| Step | \n", - "Training Loss | \n", - "
|---|---|
| 1 | \n", - "1.350100 | \n", - "
| 2 | \n", - "2.015800 | \n", - "
| 3 | \n", - "1.048700 | \n", - "
| 4 | \n", - "1.287700 | \n", - "
| 5 | \n", - "1.451200 | \n", - "
| 6 | \n", - "1.659900 | \n", - "
| 7 | \n", - "1.472300 | \n", - "
| 8 | \n", - "1.326700 | \n", - "
| 9 | \n", - "1.140000 | \n", - "
| 10 | \n", - "1.395300 | \n", - "
| 11 | \n", - "1.776400 | \n", - "
| 12 | \n", - "1.169100 | \n", - "
| 13 | \n", - "1.434700 | \n", - "
| 14 | \n", - "1.550400 | \n", - "
| 15 | \n", - "1.440400 | \n", - "
| 16 | \n", - "1.352100 | \n", - "
| 17 | \n", - "1.062800 | \n", - "
| 18 | \n", - "1.173400 | \n", - "
| 19 | \n", - "1.385300 | \n", - "
| 20 | \n", - "1.433300 | \n", - "
| 21 | \n", - "1.787800 | \n", - "
| 22 | \n", - "1.600200 | \n", - "
| 23 | \n", - "1.067800 | \n", - "
| 24 | \n", - "1.679300 | \n", - "
| 25 | \n", - "1.209900 | \n", - "
| 26 | \n", - "1.305200 | \n", - "
| 27 | \n", - "1.465300 | \n", - "
| 28 | \n", - "1.781800 | \n", - "
| 29 | \n", - "1.152400 | \n", - "
| 30 | \n", - "1.434400 | \n", - "
| 31 | \n", - "1.399300 | \n", - "
| 32 | \n", - "1.796300 | \n", - "
| 33 | \n", - "1.674500 | \n", - "
| 34 | \n", - "1.567600 | \n", - "
| 35 | \n", - "1.830000 | \n", - "
| 36 | \n", - "1.720200 | \n", - "
| 37 | \n", - "1.335800 | \n", - "
| 38 | \n", - "1.333000 | \n", - "
| 39 | \n", - "2.044900 | \n", - "
| 40 | \n", - "1.832200 | \n", - "
| 41 | \n", - "1.533900 | \n", - "
| 42 | \n", - "1.259900 | \n", - "
| 43 | \n", - "1.372300 | \n", - "
| 44 | \n", - "1.551600 | \n", - "
| 45 | \n", - "2.002400 | \n", - "
| 46 | \n", - "1.956100 | \n", - "
| 47 | \n", - "2.441900 | \n", - "
| 48 | \n", - "2.289100 | \n", - "
| 49 | \n", - "1.544500 | \n", - "
| 50 | \n", - "2.040300 | \n", - "
| 51 | \n", - "1.103800 | \n", - "
| 52 | \n", - "1.630800 | \n", - "
| 53 | \n", - "1.437900 | \n", - "
| 54 | \n", - "1.820900 | \n", - "
| 55 | \n", - "1.080300 | \n", - "
| 56 | \n", - "1.029200 | \n", - "
| 57 | \n", - "0.999400 | \n", - "
| 58 | \n", - "0.795900 | \n", - "
| 59 | \n", - "1.331600 | \n", - "
| 60 | \n", - "1.099500 | \n", - "
| 61 | \n", - "1.199000 | \n", - "
| 62 | \n", - "1.146000 | \n", - "
| 63 | \n", - "1.129000 | \n", - "
| 64 | \n", - "1.109500 | \n", - "
| 65 | \n", - "1.207000 | \n", - "
| 66 | \n", - "1.360600 | \n", - "
| 67 | \n", - "1.879000 | \n", - "
| 68 | \n", - "1.317200 | \n", - "
| 69 | \n", - "1.033300 | \n", - "
| 70 | \n", - "1.153400 | \n", - "
| 71 | \n", - "1.112400 | \n", - "
| 72 | \n", - "1.218400 | \n", - "
| 73 | \n", - "1.134600 | \n", - "
| 74 | \n", - "1.053200 | \n", - "
| 75 | \n", - "1.008900 | \n", - "
| 76 | \n", - "1.077000 | \n", - "
| 77 | \n", - "1.245000 | \n", - "
| 78 | \n", - "1.395900 | \n", - "
| 79 | \n", - "1.488800 | \n", - "
| 80 | \n", - "1.382500 | \n", - "
| 81 | \n", - "1.442200 | \n", - "
| 82 | \n", - "1.028500 | \n", - "
| 83 | \n", - "1.208500 | \n", - "
| 84 | \n", - "1.780200 | \n", - "
| 85 | \n", - "1.679300 | \n", - "
| 86 | \n", - "1.276600 | \n", - "
| 87 | \n", - "1.374600 | \n", - "
| 88 | \n", - "1.490000 | \n", - "
| 89 | \n", - "1.567100 | \n", - "
| 90 | \n", - "1.435000 | \n", - "
| 91 | \n", - "1.329800 | \n", - "
| 92 | \n", - "1.387600 | \n", - "
| 93 | \n", - "0.971400 | \n", - "
| 94 | \n", - "1.293800 | \n", - "
| 95 | \n", - "1.585900 | \n", - "
| 96 | \n", - "1.431700 | \n", - "
| 97 | \n", - "1.948900 | \n", - "
| 98 | \n", - "1.630500 | \n", - "
| 99 | \n", - "1.839100 | \n", - "
| 100 | \n", - "1.740900 | \n", - "
| 101 | \n", - "0.717200 | \n", - "
| 102 | \n", - "0.958100 | \n", - "
| 103 | \n", - "1.625900 | \n", - "
| 104 | \n", - "1.150000 | \n", - "
| 105 | \n", - "0.999200 | \n", - "
| 106 | \n", - "1.253100 | \n", - "
| 107 | \n", - "1.007600 | \n", - "
| 108 | \n", - "1.049700 | \n", - "
| 109 | \n", - "1.265900 | \n", - "
| 110 | \n", - "1.251300 | \n", - "
| 111 | \n", - "1.109500 | \n", - "
| 112 | \n", - "1.652500 | \n", - "
| 113 | \n", - "1.238000 | \n", - "
| 114 | \n", - "1.521300 | \n", - "
| 115 | \n", - "1.002400 | \n", - "
| 116 | \n", - "0.982400 | \n", - "
| 117 | \n", - "1.389300 | \n", - "
| 118 | \n", - "1.114900 | \n", - "
| 119 | \n", - "1.093900 | \n", - "
| 120 | \n", - "1.254200 | \n", - "
| 121 | \n", - "1.132300 | \n", - "
| 122 | \n", - "0.925300 | \n", - "
| 123 | \n", - "1.292700 | \n", - "
| 124 | \n", - "1.317600 | \n", - "
| 125 | \n", - "1.080400 | \n", - "
| 126 | \n", - "0.918800 | \n", - "
| 127 | \n", - "1.203400 | \n", - "
| 128 | \n", - "1.098800 | \n", - "
| 129 | \n", - "1.360800 | \n", - "
| 130 | \n", - "1.256900 | \n", - "
| 131 | \n", - "1.392600 | \n", - "
| 132 | \n", - "1.167600 | \n", - "
| 133 | \n", - "1.134900 | \n", - "
| 134 | \n", - "1.423700 | \n", - "
| 135 | \n", - "1.111200 | \n", - "
| 136 | \n", - "1.081600 | \n", - "
| 137 | \n", - "1.806000 | \n", - "
| 138 | \n", - "1.238800 | \n", - "
| 139 | \n", - "1.306800 | \n", - "
| 140 | \n", - "1.421900 | \n", - "
| 141 | \n", - "1.467300 | \n", - "
| 142 | \n", - "1.245100 | \n", - "
| 143 | \n", - "1.594200 | \n", - "
| 144 | \n", - "1.426000 | \n", - "
| 145 | \n", - "1.393800 | \n", - "
| 146 | \n", - "1.894400 | \n", - "
| 147 | \n", - "1.331200 | \n", - "
| 148 | \n", - "1.519400 | \n", - "
| 149 | \n", - "1.926300 | \n", - "
| 150 | \n", - "1.293200 | \n", - "
| 151 | \n", - "1.135100 | \n", - "
| 152 | \n", - "1.066700 | \n", - "
| 153 | \n", - "0.856900 | \n", - "
| 154 | \n", - "1.021500 | \n", - "
| 155 | \n", - "0.808800 | \n", - "
| 156 | \n", - "0.936300 | \n", - "
| 157 | \n", - "0.979700 | \n", - "
| 158 | \n", - "1.100200 | \n", - "
| 159 | \n", - "1.091400 | \n", - "
| 160 | \n", - "0.918800 | \n", - "
| 161 | \n", - "1.370800 | \n", - "
| 162 | \n", - "1.380300 | \n", - "
| 163 | \n", - "0.965300 | \n", - "
| 164 | \n", - "1.142400 | \n", - "
| 165 | \n", - "1.436400 | \n", - "
| 166 | \n", - "0.970400 | \n", - "
| 167 | \n", - "0.872600 | \n", - "
| 168 | \n", - "1.662500 | \n", - "
| 169 | \n", - "1.623500 | \n", - "
| 170 | \n", - "1.481700 | \n", - "
| 171 | \n", - "0.822300 | \n", - "
| 172 | \n", - "1.605500 | \n", - "
| 173 | \n", - "1.769800 | \n", - "
| 174 | \n", - "1.320100 | \n", - "
| 175 | \n", - "0.969300 | \n", - "
| 176 | \n", - "0.798700 | \n", - "
| 177 | \n", - "1.233200 | \n", - "
| 178 | \n", - "1.168500 | \n", - "
| 179 | \n", - "1.251400 | \n", - "
| 180 | \n", - "1.221500 | \n", - "
| 181 | \n", - "1.491100 | \n", - "
| 182 | \n", - "1.010200 | \n", - "
| 183 | \n", - "1.375500 | \n", - "
| 184 | \n", - "1.722900 | \n", - "
| 185 | \n", - "1.179300 | \n", - "
| 186 | \n", - "1.474400 | \n", - "
| 187 | \n", - "1.968200 | \n", - "
| 188 | \n", - "1.297200 | \n", - "
| 189 | \n", - "1.564500 | \n", - "
| 190 | \n", - "1.480700 | \n", - "
| 191 | \n", - "1.464700 | \n", - "
| 192 | \n", - "1.901400 | \n", - "
| 193 | \n", - "1.620100 | \n", - "
| 194 | \n", - "1.509000 | \n", - "
| 195 | \n", - "1.587000 | \n", - "
| 196 | \n", - "1.510000 | \n", - "
| 197 | \n", - "1.773900 | \n", - "
| 198 | \n", - "1.473200 | \n", - "
| 199 | \n", - "1.660400 | \n", - "
| 200 | \n", - "1.832600 | \n", - "
| 201 | \n", - "1.021400 | \n", - "
| 202 | \n", - "1.120400 | \n", - "
| 203 | \n", - "1.030200 | \n", - "
| 204 | \n", - "1.167500 | \n", - "
| 205 | \n", - "0.853200 | \n", - "
| 206 | \n", - "0.927000 | \n", - "
| 207 | \n", - "1.157400 | \n", - "
| 208 | \n", - "1.071600 | \n", - "
| 209 | \n", - "1.195400 | \n", - "
| 210 | \n", - "1.155800 | \n", - "
| 211 | \n", - "1.502300 | \n", - "
| 212 | \n", - "1.091600 | \n", - "
| 213 | \n", - "1.225200 | \n", - "
| 214 | \n", - "1.148900 | \n", - "
| 215 | \n", - "1.238200 | \n", - "
| 216 | \n", - "1.600200 | \n", - "
| 217 | \n", - "1.203600 | \n", - "
| 218 | \n", - "1.266200 | \n", - "
| 219 | \n", - "0.970900 | \n", - "
| 220 | \n", - "1.451000 | \n", - "
| 221 | \n", - "1.281300 | \n", - "
| 222 | \n", - "0.952500 | \n", - "
| 223 | \n", - "1.313800 | \n", - "
| 224 | \n", - "0.915700 | \n", - "
| 225 | \n", - "1.040000 | \n", - "
| 226 | \n", - "1.493800 | \n", - "
| 227 | \n", - "1.186400 | \n", - "
| 228 | \n", - "1.278700 | \n", - "
| 229 | \n", - "1.061100 | \n", - "
| 230 | \n", - "1.209000 | \n", - "
| 231 | \n", - "0.881400 | \n", - "
| 232 | \n", - "1.659300 | \n", - "
| 233 | \n", - "1.135200 | \n", - "
| 234 | \n", - "1.497800 | \n", - "
| 235 | \n", - "1.557500 | \n", - "
| 236 | \n", - "0.849200 | \n", - "
| 237 | \n", - "1.329200 | \n", - "
| 238 | \n", - "1.147700 | \n", - "
| 239 | \n", - "1.764600 | \n", - "
| 240 | \n", - "1.740000 | \n", - "
| 241 | \n", - "2.043700 | \n", - "
| 242 | \n", - "1.675000 | \n", - "
| 243 | \n", - "1.809600 | \n", - "
| 244 | \n", - "1.721400 | \n", - "
| 245 | \n", - "2.343300 | \n", - "
| 246 | \n", - "1.830400 | \n", - "
| 247 | \n", - "1.754400 | \n", - "
| 248 | \n", - "1.741900 | \n", - "
| 249 | \n", - "2.011000 | \n", - "
| 250 | \n", - "1.741700 | \n", - "
"
- ]
- },
- "metadata": {}
- }
- ]
- },
- {
- "cell_type": "code",
- "source": [
- "# %load_ext tensorboard\n",
- "# %tensorboard --logdir results/runs"
- ],
- "metadata": {
- "id": "crj9svNe4hU5"
- },
- "execution_count": null,
- "outputs": []
- },
- {
- "cell_type": "code",
- "source": [
- "# Ignore warnings\n",
- "logging.set_verbosity(logging.CRITICAL)\n",
- "\n",
- "# Run text generation pipeline with our next model\n",
- "prompt = \"What is a large language model?\"\n",
- "pipe = pipeline(task=\"text-generation\", model=model, tokenizer=tokenizer, max_length=200)\n",
- "result = pipe(f\"[INST] {prompt} [/INST]\")\n",
- "print(result[0]['generated_text'])"
- ],
- "metadata": {
- "id": "frlSLPin4IJ4",
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "outputId": "e5bf6b3a-f20e-49f7-e0b7-36f71ca207c1"
- },
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1270: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use a generation configuration file (see https://huggingface.co/docs/transformers/main_classes/text_generation )\n",
- " warnings.warn(\n",
- "/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:31: UserWarning: None of the inputs have requires_grad=True. Gradients will be None\n",
- " warnings.warn(\"None of the inputs have requires_grad=True. Gradients will be None\")\n"
- ]
- },
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "[INST] What is a large language model? [/INST] A large language model is a type of artificial intelligence (AI) model that is trained on a large dataset of text to generate human-like language outputs. It is designed to be able to understand and generate text in a way that is similar to human language, and can be used for a wide range of applications such as chatbots, language translation, and text summarization.\n",
- "\n",
- "Large language models are typically trained using deep learning techniques, such as recurrent neural networks (RNNs) or transformer models, and are often based on pre-trained models such as BERT or RoBERTa. These models are trained on large datasets of text, such as books, articles, or websites, and are designed to learn the patterns and structures of language.\n",
- "\n",
- "Some examples of large language models include:\n",
- "\n",
- "* BERT (Bidirectional Encoder Representations from Transformers\n"
- ]
- }
- ]
- },
- {
- "cell_type": "code",
- "source": [
- "# Empty VRAM\n",
- "del model\n",
- "del pipe\n",
- "del trainer\n",
- "import gc\n",
- "gc.collect()\n",
- "gc.collect()"
- ],
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "mkQCviG0Zta-",
- "outputId": "e7c4ab10-4039-4490-b7f0-6ea118bdd709"
- },
- "execution_count": null,
- "outputs": [
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "19965"
- ]
- },
- "metadata": {},
- "execution_count": 7
- }
- ]
- },
- {
- "cell_type": "code",
- "source": [
- "# Reload model in FP16 and merge it with LoRA weights\n",
- "base_model = AutoModelForCausalLM.from_pretrained(\n",
- " model_name,\n",
- " low_cpu_mem_usage=True,\n",
- " return_dict=True,\n",
- " torch_dtype=torch.float16,\n",
- " device_map=device_map,\n",
- ")\n",
- "model = PeftModel.from_pretrained(base_model, new_model)\n",
- "model = model.merge_and_unload()\n",
- "\n",
- "# Reload tokenizer to save it\n",
- "tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n",
- "tokenizer.pad_token = tokenizer.eos_token\n",
- "tokenizer.padding_side = \"right\""
- ],
- "metadata": {
- "id": "QQn30cRtAZ-P",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 49,
- "referenced_widgets": [
- "051d193cd87f47c1971fb87544e1e615",
- "9d7247c119e642c5894f15ca6974ef3e",
- "a79c22bb34ec4f698a00752b47a6f631",
- "d95f3a3f26c6470d984542cdfd68bec1",
- "343e11c62a59448eb43bbc0c31bf5f11",
- "a153c96bd1fe4c48a41e9b9c7c00dd6e",
- "84da055d24694320843e13ad37438792",
- "e375632975904402baea46163e2eeca1",
- "95501d0b5a22407288f008bf8cc69726",
- "6aef866a6c474dfabb2140ded933c5aa",
- "d66fa096d442423c9447cbfbdc1aad8d"
- ]
- },
- "outputId": "1c5ef3c4-d107-4c43-9bd6-0ca72903db0e"
- },
- "execution_count": null,
- "outputs": [
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "Loading checkpoint shards: 0%| | 0/2 [00:00, ?it/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "051d193cd87f47c1971fb87544e1e615"
- }
- },
- "metadata": {}
- }
- ]
- },
- {
- "cell_type": "code",
- "source": [
- "!huggingface-cli login\n",
- "\n",
- "model.push_to_hub(new_model, use_temp_dir=False)\n",
- "tokenizer.push_to_hub(new_model, use_temp_dir=False)"
- ],
- "metadata": {
- "id": "x-xPb-_qB0dz",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 373,
- "referenced_widgets": [
- "c99aff4cfd664ae8a165a27bea0566c8",
- "e4b64cab6b7b418c8a2575ee26839039",
- "c3a4fedc73b3480089ef9d13381471ed",
- "bf722f71c61b4285bcbbf32fd619b3a6",
- "fd11a6148b704c5b9142c5e8de2d3b25",
- "f0bcdaf940d14ad796fc7ac46c8e1e64",
- "b6e821c974674f2290c354238d6c919c",
- "eeba50e8242c4753bfc0ea48e03f9078",
- "7a1f3340688d408092adade75f4baac4",
- "8c887ca9b0eb44fdb8608bf36b5db5c5",
- "e4698337e6b843afac706ab657ca6af9",
- "1af01f1f1aac42b8bff46fe4df8a59ad",
- "eee8731f316244eda5ff0765fd12bf85",
- "f135278e410f4b708435bb80fb630bcf",
- "2e6fc79bf5c149d6b0bc5c52e18debc7",
- "a4b0debc025444a59abd6953b3512c0d",
- "130120644beb48acbc038651459af43c",
- "bf77e97593a349718bdb5fd9bfd28fe3",
- "f7292741953e47699540ef8712fc0d8d",
- "9434350b1b9c4060812feb9ecbf63278",
- "b29647e268414329be56047e522e28b9",
- "27bb18a199ca47108c7a61e9c443de36",
- "33ebb868f3e846f6af1a1a2a8ad6a3cb",
- "1f73f8b4d4da4e74adc135f2a2f6ee65",
- "68da6e6e69c8419895bea2068760534e",
- "6dc1a868e08c4c3b8315116d2c46573b",
- "7a5d714c17374104bb6f5caaa5541c10",
- "1b6c59a51359453c926bfcddb3d0f0ea",
- "dac3669f18284161a58d52f26dffb761",
- "a3511f489f6d47cc8d404ab6f367b29f",
- "20670478612f4b1a8a5f23d71a2609a7",
- "b463153ec04749e38540389efa2981f7",
- "2bb3d36d248a48fba364f14d9e840306"
- ]
- },
- "outputId": "6ed9166c-5f92-4375-eca5-dbb247c0e13a"
- },
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "\n",
- " _| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_|\n",
- " _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|\n",
- " _|_|_|_| _| _| _| _|_| _| _|_| _| _| _| _| _| _|_| _|_|_| _|_|_|_| _| _|_|_|\n",
- " _| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|\n",
- " _| _| _|_| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _| _| _| _|_|_| _|_|_|_|\n",
- " \n",
- " To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .\n",
- "Token: \n",
- "Add token as git credential? (Y/n) n\n",
- "Token is valid (permission: write).\n",
- "Your token has been saved to /root/.cache/huggingface/token\n",
- "Login successful\n"
- ]
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "Upload 2 LFS files: 0%| | 0/2 [00:00, ?it/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "c99aff4cfd664ae8a165a27bea0566c8"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "pytorch_model-00001-of-00002.bin: 0%| | 0.00/9.98G [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "1af01f1f1aac42b8bff46fe4df8a59ad"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "pytorch_model-00002-of-00002.bin: 0%| | 0.00/3.50G [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "33ebb868f3e846f6af1a1a2a8ad6a3cb"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "CommitInfo(commit_url='https://huggingface.co/mlabonne/llama-2-7b-miniguanaco/commit/c81a32fd0b4d39e252326e639d63e75aa68c9a4a', commit_message='Upload tokenizer', commit_description='', oid='c81a32fd0b4d39e252326e639d63e75aa68c9a4a', pr_url=None, pr_revision=None, pr_num=None)"
- ]
- },
- "metadata": {},
- "execution_count": 10
- }
- ]
- }
- ]
-}
\ No newline at end of file
diff --git a/Fine_tune_a_Mistral_7b_model_with_DPO.ipynb b/Fine_tune_a_Mistral_7b_model_with_DPO.ipynb
deleted file mode 100644
index 788a43c..0000000
--- a/Fine_tune_a_Mistral_7b_model_with_DPO.ipynb
+++ /dev/null
@@ -1,757 +0,0 @@
-{
- "nbformat": 4,
- "nbformat_minor": 0,
- "metadata": {
- "colab": {
- "provenance": [],
- "machine_shape": "hm",
- "gpuType": "A100",
- "authorship_tag": "ABX9TyNuIN7/ICiXCX5xELzN1Y3R",
- "include_colab_link": true
- },
- "kernelspec": {
- "name": "python3",
- "display_name": "Python 3"
- },
- "language_info": {
- "name": "python"
- },
- "widgets": {
- "application/vnd.jupyter.widget-state+json": {
- "22773c721a7c4221a9c14cd388461d4c": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_6b54841f5de1482694c360095dae3039",
- "IPY_MODEL_448ccbc85e624ec3b3e71931a7ee4ff6",
- "IPY_MODEL_173769f6f465485f8848a11bf269850b"
- ],
- "layout": "IPY_MODEL_60978b9b4e8348f0a71ce3e35c73bcff"
- }
- },
- "6b54841f5de1482694c360095dae3039": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_6a38dcbaf4674b448329ac0a16587d2a",
- "placeholder": "β",
- "style": "IPY_MODEL_7eaeada2158e493189449af91f643553",
- "value": "Loading checkpoint shards: 100%"
- }
- },
- "448ccbc85e624ec3b3e71931a7ee4ff6": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_6e32854952b340008edca0139d3471d6",
- "max": 3,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_db6d7cfcdade4b4baa213a5d0abc07d7",
- "value": 3
- }
- },
- "173769f6f465485f8848a11bf269850b": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_9083029642744c43b7705532cbe0cf79",
- "placeholder": "β",
- "style": "IPY_MODEL_d028a98caa13425b907ceb513119006e",
- "value": " 3/3 [00:11<00:00, 2.89s/it]"
- }
- },
- "60978b9b4e8348f0a71ce3e35c73bcff": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "6a38dcbaf4674b448329ac0a16587d2a": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "7eaeada2158e493189449af91f643553": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "6e32854952b340008edca0139d3471d6": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "db6d7cfcdade4b4baa213a5d0abc07d7": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "9083029642744c43b7705532cbe0cf79": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "d028a98caa13425b907ceb513119006e": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- }
- }
- },
- "accelerator": "GPU"
- },
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "view-in-github",
- "colab_type": "text"
- },
- "source": [
- ""
- ]
- },
- {
- "cell_type": "markdown",
- "source": [
- "# Fine-tune a Mistral-7b model with DPO\n",
- "\n",
- "> π£οΈ [Large Language Model Course](https://github.com/mlabonne/llm-course)\n",
- "\n",
- "β€οΈ Created by [@maximelabonne](https://twitter.com/maximelabonne)."
- ],
- "metadata": {
- "id": "Pa8905-YsHAn"
- }
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "_zIBL8IssExG"
- },
- "outputs": [],
- "source": [
- "!pip install -q datasets trl peft bitsandbytes sentencepiece wandb"
- ]
- },
- {
- "cell_type": "code",
- "source": [
- "import os\n",
- "import gc\n",
- "import torch\n",
- "\n",
- "import transformers\n",
- "from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig\n",
- "from datasets import load_dataset\n",
- "from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training\n",
- "from trl import DPOTrainer\n",
- "import bitsandbytes as bnb\n",
- "from google.colab import userdata\n",
- "import wandb\n",
- "\n",
- "# Defined in the secrets tab in Google Colab\n",
- "hf_token = userdata.get('huggingface')\n",
- "wb_token = userdata.get('wandb')\n",
- "wandb.login(key=wb_token)\n",
- "\n",
- "model_name = \"teknium/OpenHermes-2.5-Mistral-7B\"\n",
- "new_model = \"NeuralHermes-2.5-Mistral-7B\""
- ],
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "YpdkZsMNylvp",
- "outputId": "6c2df234-1ce7-4cd2-a7e3-567e7536319f"
- },
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "/usr/local/lib/python3.10/dist-packages/trl/trainer/ppo_config.py:141: UserWarning: The `optimize_cuda_cache` arguement will be deprecated soon, please use `optimize_device_cache` instead.\n",
- " warnings.warn(\n",
- "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mmlabonne\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n",
- "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m If you're specifying your api key in code, ensure this code is not shared publicly.\n",
- "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Consider setting the WANDB_API_KEY environment variable, or running `wandb login` from the command line.\n",
- "\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc\n"
- ]
- }
- ]
- },
- {
- "cell_type": "markdown",
- "source": [
- "## Format dataset"
- ],
- "metadata": {
- "id": "d8CvUgROUDw-"
- }
- },
- {
- "cell_type": "code",
- "source": [
- "def chatml_format(example):\n",
- " # Format system\n",
- " if len(example['system']) > 0:\n",
- " message = {\"role\": \"system\", \"content\": example['system']}\n",
- " system = tokenizer.apply_chat_template([message], tokenize=False)\n",
- " else:\n",
- " system = \"\"\n",
- "\n",
- " # Format instruction\n",
- " message = {\"role\": \"user\", \"content\": example['question']}\n",
- " prompt = tokenizer.apply_chat_template([message], tokenize=False, add_generation_prompt=True)\n",
- "\n",
- " # Format chosen answer\n",
- " chosen = example['chosen'] + \"<|im_end|>\\n\"\n",
- "\n",
- " # Format rejected answer\n",
- " rejected = example['rejected'] + \"<|im_end|>\\n\"\n",
- "\n",
- " return {\n",
- " \"prompt\": system + prompt,\n",
- " \"chosen\": chosen,\n",
- " \"rejected\": rejected,\n",
- " }\n",
- "\n",
- "# Load dataset\n",
- "dataset = load_dataset(\"Intel/orca_dpo_pairs\")['train']\n",
- "\n",
- "# Save columns\n",
- "original_columns = dataset.column_names\n",
- "\n",
- "# Tokenizer\n",
- "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
- "tokenizer.pad_token = tokenizer.eos_token\n",
- "tokenizer.padding_side = \"left\"\n",
- "\n",
- "# Format dataset\n",
- "dataset = dataset.map(\n",
- " chatml_format,\n",
- " remove_columns=original_columns\n",
- ")\n",
- "\n",
- "# Print sample\n",
- "dataset[1]"
- ],
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "MCD77GZ60DOT",
- "outputId": "c7c6773c-5545-4fee-bfa3-6fa6d69c0f3f"
- },
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
- "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
- ]
- },
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "{'prompt': '<|im_start|>system\\nYou are an AI assistant. You will be given a task. You must generate a detailed and long answer.<|im_end|>\\n<|im_start|>user\\nGenerate an approximately fifteen-word sentence that describes all this data: Midsummer House eatType restaurant; Midsummer House food Chinese; Midsummer House priceRange moderate; Midsummer House customer rating 3 out of 5; Midsummer House near All Bar One<|im_end|>\\n<|im_start|>assistant\\n',\n",
- " 'chosen': 'Midsummer House is a moderately priced Chinese restaurant with a 3/5 customer rating, located near All Bar One.<|im_end|>\\n',\n",
- " 'rejected': ' Sure! Here\\'s a sentence that describes all the data you provided:\\n\\n\"Midsummer House is a moderately priced Chinese restaurant with a customer rating of 3 out of 5, located near All Bar One, offering a variety of delicious dishes.\"<|im_end|>\\n'}"
- ]
- },
- "metadata": {},
- "execution_count": 3
- }
- ]
- },
- {
- "cell_type": "markdown",
- "source": [
- "## Train model with DPO"
- ],
- "metadata": {
- "id": "DeT5eUK_UJgK"
- }
- },
- {
- "cell_type": "code",
- "source": [
- "# LoRA configuration\n",
- "peft_config = LoraConfig(\n",
- " r=16,\n",
- " lora_alpha=16,\n",
- " lora_dropout=0.05,\n",
- " bias=\"none\",\n",
- " task_type=\"CAUSAL_LM\",\n",
- " target_modules=['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']\n",
- ")\n",
- "\n",
- "# Model to fine-tune\n",
- "model = AutoModelForCausalLM.from_pretrained(\n",
- " model_name,\n",
- " torch_dtype=torch.float16,\n",
- " load_in_4bit=True\n",
- ")\n",
- "model.config.use_cache = False\n",
- "\n",
- "# Training arguments\n",
- "training_args = TrainingArguments(\n",
- " per_device_train_batch_size=4,\n",
- " gradient_accumulation_steps=4,\n",
- " gradient_checkpointing=True,\n",
- " learning_rate=5e-5,\n",
- " lr_scheduler_type=\"cosine\",\n",
- " max_steps=200,\n",
- " save_strategy=\"no\",\n",
- " logging_steps=1,\n",
- " output_dir=new_model,\n",
- " optim=\"paged_adamw_32bit\",\n",
- " warmup_steps=100,\n",
- " bf16=True,\n",
- " report_to=\"wandb\",\n",
- ")\n",
- "\n",
- "# Create DPO trainer\n",
- "dpo_trainer = DPOTrainer(\n",
- " model,\n",
- " args=training_args,\n",
- " train_dataset=dataset,\n",
- " tokenizer=tokenizer,\n",
- " peft_config=peft_config,\n",
- " beta=0.1,\n",
- " max_prompt_length=1024,\n",
- " max_length=1536,\n",
- ")\n",
- "\n",
- "# Fine-tune model with DPO\n",
- "dpo_trainer.train()"
- ],
- "metadata": {
- "id": "rKPILNOLR-aK"
- },
- "execution_count": null,
- "outputs": []
- },
- {
- "cell_type": "markdown",
- "source": [
- "## Upload model"
- ],
- "metadata": {
- "id": "3LdhPpcrUM3H"
- }
- },
- {
- "cell_type": "code",
- "source": [
- "# Save artifacts\n",
- "dpo_trainer.model.save_pretrained(\"final_checkpoint\")\n",
- "tokenizer.save_pretrained(\"final_checkpoint\")\n",
- "\n",
- "# Flush memory\n",
- "del dpo_trainer, model\n",
- "gc.collect()\n",
- "torch.cuda.empty_cache()\n",
- "\n",
- "# Reload model in FP16 (instead of NF4)\n",
- "base_model = AutoModelForCausalLM.from_pretrained(\n",
- " model_name,\n",
- " return_dict=True,\n",
- " torch_dtype=torch.float16,\n",
- ")\n",
- "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
- "\n",
- "# Merge base model with the adapter\n",
- "model = PeftModel.from_pretrained(base_model, \"final_checkpoint\")\n",
- "model = model.merge_and_unload()\n",
- "\n",
- "# Save model and tokenizer\n",
- "model.save_pretrained(new_model)\n",
- "tokenizer.save_pretrained(new_model)\n",
- "\n",
- "# Push them to the HF Hub\n",
- "model.push_to_hub(new_model, use_temp_dir=False, token=hf_token)\n",
- "tokenizer.push_to_hub(new_model, use_temp_dir=False, token=hf_token)"
- ],
- "metadata": {
- "id": "h7cIvxcTfBC4"
- },
- "execution_count": null,
- "outputs": []
- },
- {
- "cell_type": "markdown",
- "source": [
- "## Inference"
- ],
- "metadata": {
- "id": "G6EFsmS4UOgV"
- }
- },
- {
- "cell_type": "code",
- "source": [
- "# Format prompt\n",
- "message = [\n",
- " {\"role\": \"system\", \"content\": \"You are a helpful assistant chatbot.\"},\n",
- " {\"role\": \"user\", \"content\": \"What is a Large Language Model?\"}\n",
- "]\n",
- "tokenizer = AutoTokenizer.from_pretrained(new_model)\n",
- "prompt = tokenizer.apply_chat_template(message, add_generation_prompt=True, tokenize=False)\n",
- "\n",
- "# Create pipeline\n",
- "pipeline = transformers.pipeline(\n",
- " \"text-generation\",\n",
- " model=new_model,\n",
- " tokenizer=tokenizer\n",
- ")\n",
- "\n",
- "# Generate text\n",
- "sequences = pipeline(\n",
- " prompt,\n",
- " do_sample=True,\n",
- " temperature=0.7,\n",
- " top_p=0.9,\n",
- " num_return_sequences=1,\n",
- " max_length=200,\n",
- ")\n",
- "print(sequences[0]['generated_text'])"
- ],
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 251,
- "referenced_widgets": [
- "22773c721a7c4221a9c14cd388461d4c",
- "6b54841f5de1482694c360095dae3039",
- "448ccbc85e624ec3b3e71931a7ee4ff6",
- "173769f6f465485f8848a11bf269850b",
- "60978b9b4e8348f0a71ce3e35c73bcff",
- "6a38dcbaf4674b448329ac0a16587d2a",
- "7eaeada2158e493189449af91f643553",
- "6e32854952b340008edca0139d3471d6",
- "db6d7cfcdade4b4baa213a5d0abc07d7",
- "9083029642744c43b7705532cbe0cf79",
- "d028a98caa13425b907ceb513119006e"
- ]
- },
- "id": "LAEUZFjvlJOv",
- "outputId": "9b5720c7-49ef-45c7-e5a7-f38d64899b1e"
- },
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
- ]
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "Loading checkpoint shards: 0%| | 0/3 [00:00, ?it/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "22773c721a7c4221a9c14cd388461d4c"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py:1473: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use and modify the model generation configuration (see https://huggingface.co/docs/transformers/generation_strategies#default-text-generation-configuration )\n",
- " warnings.warn(\n",
- "Setting `pad_token_id` to `eos_token_id`:32000 for open-end generation.\n"
- ]
- },
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "<|im_start|>system\n",
- "You are a helpful assistant chatbot.<|im_end|>\n",
- "<|im_start|>user\n",
- "What is a Large Language Model?<|im_end|>\n",
- "<|im_start|>assistant\n",
- "A large language model is a type of artificial intelligence (AI) system that has been trained on vast amounts of text data. These models are designed to understand and generate human language, allowing them to perform various natural language processing tasks, such as text generation, language translation, and question answering. Large language models typically use deep learning techniques, like recurrent neural networks (RNNs) or transformers, to learn patterns and relationships in the data, enabling them to generate coherent and contextually relevant responses. The size of these models, in terms of the number of parameters and the volume of data they are trained on, plays a significant role in their ability to comprehend and produce complex language structures.\n"
- ]
- }
- ]
- }
- ]
-}
\ No newline at end of file
diff --git a/Introduction_to_Weight_Quantization.ipynb b/Introduction_to_Weight_Quantization.ipynb
deleted file mode 100644
index 26225da..0000000
--- a/Introduction_to_Weight_Quantization.ipynb
+++ /dev/null
@@ -1,2822 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "view-in-github",
- "colab_type": "text"
- },
- "source": [
- "
"
- ]
- },
- {
- "cell_type": "markdown",
- "source": [
- "# Introduction to Weight Quantization\n",
- "> Reducing the size of Large Language Models with 8-bit quantization\n",
- "\n",
- "β€οΈ Created by [@maximelabonne](https://twitter.com/maximelabonne).\n",
- "\n",
- "Companion notebook to execute the code from the following article: https://mlabonne.github.io/blog/intro_weight_quantization/"
- ],
- "metadata": {
- "id": "yG1VY-TJoxix"
- }
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "WMVwLxdUzlq2"
- },
- "outputs": [],
- "source": [
- "import torch\n",
- "\n",
- "def absmax_quantize(X):\n",
- " # Calculate scale\n",
- " scale = 127 / torch.max(torch.abs(X))\n",
- "\n",
- " # Quantize\n",
- " X_quant = (scale * X).round()\n",
- "\n",
- " # Dequantize\n",
- " X_dequant = X_quant / scale\n",
- "\n",
- " return X_quant.to(torch.int8), X_dequant"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "CE7XqWOR6oCa"
- },
- "outputs": [],
- "source": [
- "def zeropoint_quantize(X):\n",
- " # Calculate value range (denominator)\n",
- " x_range = torch.max(X) - torch.min(X)\n",
- " x_range = 1 if x_range == 0 else x_range\n",
- "\n",
- " # Calculate scale\n",
- " scale = 255 / x_range\n",
- "\n",
- " # Shift by zero-point\n",
- " zeropoint = (-scale * torch.min(X) - 128).round()\n",
- "\n",
- " # Scale and round the inputs\n",
- " X_quant = torch.clip((X * scale + zeropoint).round(), -128, 127)\n",
- "\n",
- " # Dequantize\n",
- " X_dequant = (X_quant - zeropoint) / scale\n",
- "\n",
- " return X_quant.to(torch.int8), X_dequant"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "lIYdn1woOS1n"
- },
- "outputs": [],
- "source": [
- "!pip install -q bitsandbytes>=0.39.0\n",
- "!pip install -q git+https://github.com/huggingface/accelerate.git\n",
- "!pip install -q git+https://github.com/huggingface/transformers.git"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 792,
- "referenced_widgets": [
- "e7e1636dbc8944c49b375286b8d89fe4",
- "ca714b7b6dd94fedb669ce39c6796bc4",
- "71616c99117145adaca155701522460b",
- "d7364ae105524cfba0eed4095273ed57",
- "eeb266ac1aba409184d9ac1fbd433e9c",
- "fd1a537d4c1c49b6972f7bad58a24417",
- "bb06d9a3f9f64821b7839ce777580c4b",
- "c53b9dc02f144a91aeca032b8b67259b",
- "e5a0a603b6ee48c8ab7e5f5bcb62c3ff",
- "15a5a3eb178348ef83cb7d02198636b3",
- "47b5db2b2caa48cca14ea162ed5e7bd3",
- "7784bd5d3f1b4ac890a711c01bd653cb",
- "7626f77200244f489150c2c4c6794e53",
- "1482e979e3ae4731b9b673b5a7db44ab",
- "d95190bba0364de99f04bb9adad173b5",
- "2f4124fead964700a4158cf1373bf74e",
- "6288899dd1c64a779d79e17095b8ea9d",
- "c013211a09134f42934ac041f86b5cfe",
- "85e096686a604059a0a61afacedac67b",
- "0a23f8cda4484b6ca80c14b6691cc56d",
- "dcb679577da0472c841ab985ef92618f",
- "c7438dd71b964dea809e9f4a95046439",
- "0e047a8a2a0e4337b17f3e8612044967",
- "69e9a5bb2972442b9895e70210274c8d",
- "768c433b555b43669de698cf9c738d79",
- "43a348d835cd4eb4936d8d89c6999de7",
- "715ab0815132494892001b7a15ebd9ed",
- "9290f81e9e5d4a39b28a3836d6472886",
- "fba8130749824daaa73c5890c773e900",
- "de639b2a33e34fc593136c7bb07da47b",
- "d804702825694057a213fbae380b94d6",
- "b1d0c88f5b87449380a29956147b867a",
- "38888cadff0a472e96c4925e2881a755",
- "681a9a78878945b7b6afb2d87b769146",
- "55add7bdffbe4ddea4fb7407aa61fbc8",
- "a3a20248b4e843249ec3a10d7c8e84ad",
- "e1ef7e3213a446a4815a84b8aab67576",
- "6a9c13356d424bd6bad9565a14f28f16",
- "c4f633e0dcb74c8992c482efc80ebe31",
- "d8f5dafc06ca4bf0b28a7101ebe7a07e",
- "d12f1b2228d444948862d94d769d0b0d",
- "216cb64c2b4a41eb8114176608f6a0ca",
- "0747780fac22461a8d8ef53dbb18ca39",
- "4fdb1280c19a4df6ba35a95abf9862f0",
- "cea83a47549a4ddb91eae020d1cd943c",
- "f1f75a95e2094ebb9d5a891447008a7b",
- "12a91a0d7ff94165a75640b285c08a52",
- "dd3fa44234334118918ef5c632ee65d1",
- "2463480560e14307864ba9743dc5d41d",
- "06d0c57030474d008ff4bbafa1e35695",
- "450cb6fd2bec48dfb73869cffd3d5c9f",
- "a893cd0d7cd74a53b0a6504af7580f65",
- "290836d667df420597ccbe2b934ca5ed",
- "1d57938333654dd49156e6d488688d13",
- "1ca5165f5f4443d5a20cf361b01922d1",
- "90cf2d52029d4392aaea970508506261",
- "c0c5a26685dc414f9a1295f077c81488",
- "82d9759ea89a44feb72b57ca67bc7f2a",
- "f8b92ab82eb64c57bafb1f37ef34af1b",
- "8d78e5c72f894275b504948461193b66",
- "77b9f13345b24caaa7afc3bd43e33eba",
- "57b079495f844af684c24bb1cd711bf0",
- "0227537a636e41cb93abc9e416015cce",
- "b45e6e8aa21f47c1821f2b24a2f46944",
- "8257dc1927514846b51511bc387a54db",
- "7e7d6bc9b8544e078003d2fe6c74dcef"
- ]
- },
- "id": "NTDg7uUOGBmS",
- "outputId": "cc48b090-31d1-41ae-ca5c-dbffcb67bcb6"
- },
- "outputs": [
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "Downloading (β¦)lve/main/config.json: 0%| | 0.00/665 [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "e7e1636dbc8944c49b375286b8d89fe4"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "\n",
- "===================================BUG REPORT===================================\n",
- "Welcome to bitsandbytes. For bug reports, please run\n",
- "\n",
- "python -m bitsandbytes\n",
- "\n",
- " and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues\n",
- "================================================================================\n",
- "bin /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda118.so\n",
- "CUDA_SETUP: WARNING! libcudart.so not found in any environmental path. Searching in backup paths...\n",
- "CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so.11.0\n",
- "CUDA SETUP: Highest compute capability among GPUs detected: 7.5\n",
- "CUDA SETUP: Detected CUDA version 118\n",
- "CUDA SETUP: Loading binary /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda118.so...\n"
- ]
- },
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "/usr/local/lib/python3.10/dist-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: /usr/lib64-nvidia did not contain ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] as expected! Searching further paths...\n",
- " warn(msg)\n",
- "/usr/local/lib/python3.10/dist-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('/sys/fs/cgroup/memory.events /var/colab/cgroup/jupyter-children/memory.events')}\n",
- " warn(msg)\n",
- "/usr/local/lib/python3.10/dist-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('http'), PosixPath('//172.28.0.1'), PosixPath('8013')}\n",
- " warn(msg)\n",
- "/usr/local/lib/python3.10/dist-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('--logtostderr --listen_host=172.28.0.12 --target_host=172.28.0.12 --tunnel_background_save_url=https'), PosixPath('//colab.research.google.com/tun/m/cc48301118ce562b961b3c22d803539adc1e0c19/gpu-t4-s-20b5bv2xvtu9a --tunnel_background_save_delay=10s --tunnel_periodic_background_save_frequency=30m0s --enable_output_coalescing=true --output_coalescing_required=true')}\n",
- " warn(msg)\n",
- "/usr/local/lib/python3.10/dist-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('/env/python')}\n",
- " warn(msg)\n",
- "/usr/local/lib/python3.10/dist-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('module'), PosixPath('//ipykernel.pylab.backend_inline')}\n",
- " warn(msg)\n",
- "/usr/local/lib/python3.10/dist-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: Found duplicate ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] files: {PosixPath('/usr/local/cuda/lib64/libcudart.so.11.0'), PosixPath('/usr/local/cuda/lib64/libcudart.so')}.. We'll flip a coin and try one of these, in order to fail forward.\n",
- "Either way, this might cause trouble in the future:\n",
- "If you get `CUDA error: invalid device function` errors, the above might be the cause and the solution is to make sure only one ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] in the paths that we search based on your env.\n",
- " warn(msg)\n"
- ]
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "Downloading model.safetensors: 0%| | 0.00/548M [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "7784bd5d3f1b4ac890a711c01bd653cb"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "Downloading (β¦)neration_config.json: 0%| | 0.00/124 [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "0e047a8a2a0e4337b17f3e8612044967"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "Downloading (β¦)olve/main/vocab.json: 0%| | 0.00/1.04M [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "681a9a78878945b7b6afb2d87b769146"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "Downloading (β¦)olve/main/merges.txt: 0%| | 0.00/456k [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "cea83a47549a4ddb91eae020d1cd943c"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "Downloading (β¦)/main/tokenizer.json: 0%| | 0.00/1.36M [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "90cf2d52029d4392aaea970508506261"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "Model size: 510,342,192 bytes\n"
- ]
- }
- ],
- "source": [
- "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
- "import torch\n",
- "torch.manual_seed(0)\n",
- "\n",
- "# Set device to CPU for now\n",
- "device = 'cpu'\n",
- "\n",
- "# Load model and tokenizer\n",
- "model_id = 'gpt2'\n",
- "model = AutoModelForCausalLM.from_pretrained(model_id).to(device)\n",
- "tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
- "\n",
- "# Print model size\n",
- "print(f\"Model size: {model.get_memory_footprint():,} bytes\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "YPI1EaimHyHm",
- "outputId": "977e9b34-9426-46a1-d6c2-da80884b7483"
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "Original weights:\n",
- "tensor([[-0.4738, -0.2614, -0.0978, ..., 0.0513, -0.0584, 0.0250],\n",
- " [ 0.0874, 0.1473, 0.2387, ..., -0.0525, -0.0113, -0.0156],\n",
- " [ 0.0039, 0.0695, 0.3668, ..., 0.1143, 0.0363, -0.0318],\n",
- " ...,\n",
- " [-0.2592, -0.0164, 0.1991, ..., 0.0095, -0.0516, 0.0319],\n",
- " [ 0.1517, 0.2170, 0.1043, ..., 0.0293, -0.0429, -0.0475],\n",
- " [-0.4100, -0.1924, -0.2400, ..., -0.0046, 0.0070, 0.0198]])\n",
- "\n",
- "Absmax quantized weights:\n",
- "tensor([[-21, -12, -4, ..., 2, -3, 1],\n",
- " [ 4, 7, 11, ..., -2, -1, -1],\n",
- " [ 0, 3, 16, ..., 5, 2, -1],\n",
- " ...,\n",
- " [-12, -1, 9, ..., 0, -2, 1],\n",
- " [ 7, 10, 5, ..., 1, -2, -2],\n",
- " [-18, -9, -11, ..., 0, 0, 1]], dtype=torch.int8)\n",
- "\n",
- "Zero-point quantized weights:\n",
- "tensor([[-20, -11, -3, ..., 3, -2, 2],\n",
- " [ 5, 8, 12, ..., -1, 0, 0],\n",
- " [ 1, 4, 18, ..., 6, 3, 0],\n",
- " ...,\n",
- " [-11, 0, 10, ..., 1, -1, 2],\n",
- " [ 8, 11, 6, ..., 2, -1, -1],\n",
- " [-18, -8, -10, ..., 1, 1, 2]], dtype=torch.int8)\n"
- ]
- }
- ],
- "source": [
- "# Extract weights of the first layer\n",
- "weights = model.transformer.h[0].attn.c_attn.weight.data\n",
- "print(\"Original weights:\")\n",
- "print(weights)\n",
- "\n",
- "# Quantize layer using absmax quantization\n",
- "weights_abs_quant, _ = absmax_quantize(weights)\n",
- "print(\"\\nAbsmax quantized weights:\")\n",
- "print(weights_abs_quant)\n",
- "\n",
- "# Quantize layer using absmax quantization\n",
- "weights_zp_quant, _ = zeropoint_quantize(weights)\n",
- "print(\"\\nZero-point quantized weights:\")\n",
- "print(weights_zp_quant)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "5i2N7HC9Mmn7"
- },
- "outputs": [],
- "source": [
- "import numpy as np\n",
- "from copy import deepcopy\n",
- "\n",
- "# Store original weights\n",
- "weights = [param.data.clone() for param in model.parameters()]\n",
- "\n",
- "# Create model to quantize\n",
- "model_abs = deepcopy(model)\n",
- "\n",
- "# Quantize all model weights\n",
- "weights_abs = []\n",
- "for param in model_abs.parameters():\n",
- " _, dequantized = absmax_quantize(param.data)\n",
- " param.data = dequantized\n",
- " weights_abs.append(dequantized)\n",
- "\n",
- "# Create model to quantize\n",
- "model_zp = deepcopy(model)\n",
- "\n",
- "# Quantize all model weights\n",
- "weights_zp = []\n",
- "for param in model_zp.parameters():\n",
- " _, dequantized = zeropoint_quantize(param.data)\n",
- " param.data = dequantized\n",
- " weights_zp.append(dequantized)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 1000
- },
- "id": "FlM_jWwpHh34",
- "outputId": "0705932d-ec5a-4cb1-cc92-08072c014ee7"
- },
- "outputs": [
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "
"
- ]
- },
- {
- "cell_type": "markdown",
- "source": [
- "# Merge Large Language Models with mergekit\n",
- "> π£οΈ [Large Language Model Course](https://github.com/mlabonne/llm-course)\n",
- "\n",
- "β€οΈ Created by [@maximelabonne](https://twitter.com/maximelabonne).\n",
- "\n",
- "Model merging only requires a lot of RAM. With a free Google Colab account, you should be able to run it using a T4 GPU (VRAM offloading).\n",
- "\n",
- "Examples of merge configurations:\n",
- "\n",
- "### TIES-Merging\n",
- "\n",
- "```yaml\n",
- "models:\n",
- " - model: mistralai/Mistral-7B-v0.1\n",
- " # no parameters necessary for base model\n",
- " - model: OpenPipe/mistral-ft-optimized-1218\n",
- " parameters:\n",
- " density: 0.5\n",
- " weight: 0.5\n",
- " - model: mlabonne/NeuralHermes-2.5-Mistral-7B\n",
- " parameters:\n",
- " density: 0.5\n",
- " weight: 0.3\n",
- "merge_method: ties\n",
- "base_model: mistralai/Mistral-7B-v0.1\n",
- "parameters:\n",
- " normalize: true\n",
- "dtype: float16\n",
- "```\n",
- "\n",
- "You can find the final model on the Hugging Face Hub at [mlabonne/NeuralPipe-7B-ties](https://huggingface.co/mlabonne/NeuralPipe-7B-ties).\n",
- "\n",
- "### SLERP\n",
- "\n",
- "```yaml\n",
- "slices:\n",
- " - sources:\n",
- " - model: OpenPipe/mistral-ft-optimized-1218\n",
- " layer_range: [0, 32]\n",
- " - model: mlabonne/NeuralHermes-2.5-Mistral-7B\n",
- " layer_range: [0, 32]\n",
- "merge_method: slerp\n",
- "base_model: OpenPipe/mistral-ft-optimized-1218\n",
- "parameters:\n",
- " t:\n",
- " - filter: self_attn\n",
- " value: [0, 0.5, 0.3, 0.7, 1]\n",
- " - filter: mlp\n",
- " value: [1, 0.5, 0.7, 0.3, 0]\n",
- " - value: 0.5\n",
- "dtype: bfloat16\n",
- "```\n",
- "\n",
- "You can find the final model on the Hugging Face Hub at [mlabonne/NeuralPipe-7B-slerp](https://huggingface.co/mlabonne/NeuralPipe-7B-slerp).\n",
- "\n",
- "### Passthrough\n",
- "\n",
- "```yaml\n",
- "slices:\n",
- " - sources:\n",
- " - model: OpenPipe/mistral-ft-optimized-1218\n",
- " layer_range: [0, 32]\n",
- " - sources:\n",
- " - model: mlabonne/NeuralHermes-2.5-Mistral-7B\n",
- " layer_range: [24, 32]\n",
- "merge_method: passthrough\n",
- "dtype: bfloat16\n",
- "```\n",
- "\n",
- "You can find the final model on the Hugging Face Hub at [mlabonne/NeuralPipe-9B-merged](https://huggingface.co/mlabonne/NeuralPipe-9B-merged)."
- ],
- "metadata": {
- "id": "o12O0YjJvvLW"
- }
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "NPNPie5Eo3EZ"
- },
- "outputs": [],
- "source": [
- "!git clone https://github.com/cg123/mergekit.git\n",
- "!cd mergekit && pip install -q -e ."
- ]
- },
- {
- "cell_type": "code",
- "source": [
- "import yaml\n",
- "\n",
- "MODEL_NAME = \"Marcoro14-7B-slerp\"\n",
- "yaml_config = \"\"\"\n",
- "slices:\n",
- " - sources:\n",
- " - model: AIDC-ai-business/Marcoroni-7B-v3\n",
- " layer_range: [0, 32]\n",
- " - model: EmbeddedLLM/Mistral-7B-Merge-14-v0.1\n",
- " layer_range: [0, 32]\n",
- "merge_method: slerp\n",
- "base_model: AIDC-ai-business/Marcoroni-7B-v3\n",
- "parameters:\n",
- " t:\n",
- " - filter: self_attn\n",
- " value: [0, 0.5, 0.3, 0.7, 1]\n",
- " - filter: mlp\n",
- " value: [1, 0.5, 0.7, 0.3, 0]\n",
- " - value: 0.5\n",
- "dtype: bfloat16\n",
- "\n",
- "\"\"\"\n",
- "\n",
- "# Save config as yaml file\n",
- "with open('config.yaml', 'w', encoding=\"utf-8\") as f:\n",
- " f.write(yaml_config)"
- ],
- "metadata": {
- "id": "LGd7jlfCpNcg"
- },
- "execution_count": null,
- "outputs": []
- },
- {
- "cell_type": "code",
- "source": [
- "# Merge models\n",
- "!mergekit-yaml config.yaml merge --copy-tokenizer --allow-crimes --out-shard-size 1B --lazy-unpickle"
- ],
- "metadata": {
- "id": "d5mYzDo1q96y"
- },
- "execution_count": null,
- "outputs": []
- },
- {
- "cell_type": "code",
- "source": [
- "!pip install -qU huggingface_hub\n",
- "\n",
- "from huggingface_hub import ModelCard, ModelCardData\n",
- "from jinja2 import Template\n",
- "\n",
- "username = \"mlabonne\"\n",
- "\n",
- "template_text = \"\"\"\n",
- "---\n",
- "license: apache-2.0\n",
- "tags:\n",
- "- merge\n",
- "- mergekit\n",
- "- lazymergekit\n",
- "{%- for model in models %}\n",
- "- {{ model }}\n",
- "{%- endfor %}\n",
- "---\n",
- "\n",
- "# {{ model_name }}\n",
- "\n",
- "{{ model_name }} is a merge of the following models using [mergekit](https://github.com/cg123/mergekit):\n",
- "\n",
- "{%- for model in models %}\n",
- "* [{{ model }}](https://huggingface.co/{{ model }})\n",
- "{%- endfor %}\n",
- "\n",
- "## π§© Configuration\n",
- "\n",
- "```yaml\n",
- "{{- yaml_config -}}\n",
- "```\n",
- "\"\"\"\n",
- "\n",
- "# Create a Jinja template object\n",
- "jinja_template = Template(template_text.strip())\n",
- "\n",
- "# Get list of models from config\n",
- "data = yaml.safe_load(yaml_config)\n",
- "if \"models\" in data:\n",
- " models = [data[\"models\"][i][\"model\"] for i in range(len(data[\"models\"])) if \"parameters\" in data[\"models\"][i]]\n",
- "elif \"parameters\" in data:\n",
- " models = [data[\"slices\"][0][\"sources\"][i][\"model\"] for i in range(len(data[\"slices\"][0][\"sources\"]))]\n",
- "elif \"slices\" in data:\n",
- " models = [data[\"slices\"][i][\"sources\"][0][\"model\"] for i in range(len(data[\"slices\"]))]\n",
- "else:\n",
- " raise Exception(\"No models or slices found in yaml config\")\n",
- "\n",
- "# Fill the template\n",
- "content = jinja_template.render(\n",
- " model_name=MODEL_NAME,\n",
- " models=models,\n",
- " yaml_config=yaml_config,\n",
- " username=username,\n",
- ")\n",
- "\n",
- "# Save the model card\n",
- "card = ModelCard(content)\n",
- "card.save('merge/README.md')"
- ],
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "w-RNKev373lI",
- "outputId": "fccbbd1d-295f-4def-a398-f226813294bb"
- },
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "\u001b[?25l \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m0.0/330.1 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91mβββββββββββββ\u001b[0m\u001b[91mβΈ\u001b[0m\u001b[90mββββββββββββββββββββββββββ\u001b[0m \u001b[32m112.6/330.1 kB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m330.1/330.1 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h"
- ]
- }
- ]
- },
- {
- "cell_type": "code",
- "source": [
- "from google.colab import userdata\n",
- "from huggingface_hub import HfApi\n",
- "\n",
- "username = \"mlabonne\"\n",
- "\n",
- "# Defined in the secrets tab in Google Colab\n",
- "api = HfApi(token=userdata.get(\"HF_TOKEN\"))\n",
- "\n",
- "api.create_repo(\n",
- " repo_id=f\"{username}/{MODEL_NAME}\",\n",
- " repo_type=\"model\"\n",
- ")\n",
- "api.upload_folder(\n",
- " repo_id=f\"{username}/{MODEL_NAME}\",\n",
- " folder_path=\"merge\",\n",
- ")"
- ],
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 164,
- "referenced_widgets": [
- "de24d272f2b842c5b01eedb3f536b810",
- "0c5dab2657b2473385a424d90f3d4664",
- "57efe36e546c473d8be34102f6ba9a58",
- "871bad1d905d4877a9eaa242cfd54c4e",
- "8951f6b2edf64464869391197c900f84",
- "69a61ad28d5141dcbaea44060bc5ebf7",
- "76c2fbf005ae4a5790edfeb499b387b7",
- "116964f328dc45d991d895d684ac1216",
- "1ecec5ba4424498082a5f64cf3d7faf8",
- "fc4edcef273b4e75894f4b512122de94",
- "ca2323b142f54998985d30481d5cfabe",
- "63626ac2d0f546188c07512a04c71417",
- "decd91747fd04ce39f3e2b733bc7f477",
- "7140e4c154424fcab846a71889e99ed2",
- "2264d8b75251425e94e635558af4e223",
- "c37478198217457cb30c6649203cf4dc",
- "4918769e4e984dfda924776e2373154c",
- "9b48494c94cf49b5835489d97f7a24c5",
- "6ed844da52fe466eb1c10c814489448c",
- "9c60efa02e80423e828628190dd13bc3",
- "0170e8cc57d94041956f7afbf2eef449",
- "220c2ba5f2524271b24fe049431a474c",
- "a6f99dd0662846f9a381d2d507a7b447",
- "900b9fcb70a84781bd5b4213df54626d",
- "0ea83f270e164795b64f23b143efb300",
- "318dcdeac8fb40f88fa60114f1c6a7c1",
- "af89cf715e0e4c5e9f59943a255394c1",
- "40e23e35299d45d499432b8f1a9bc924",
- "126b374e286747768ef7218454534640",
- "bdd26e54eed5477f99b135552e5f3450",
- "163a6fd878134e1eb5f193d1ebfff1c1",
- "953d7c014f76413c9805a2ef8c2c9356",
- "348879bf76d1471f9c79c1ec2dc07c1d",
- "8d54ae0d028b40e7b018454187db1a1c",
- "562353040be54593b23734390f49927c",
- "00cbebe6df7d4995913f20e39fc71b15",
- "aee3c563fdc54f9cb3ebc2630c84a9e6",
- "b74e307a751844ffab9f7f3df367774b",
- "8e6142e41f714fe9abe6a5bb72c071f9",
- "49cd1c5663404fb5a307c345e7e970c3",
- "920ef8e509d24ccda930f4c47eff158c",
- "c8828d61b26a47ac97a1541e14c00f62",
- "a3d7e352222647a99be79935b1ebd86a",
- "80666ef5f07641c482a23618a767791d"
- ]
- },
- "id": "ik0V0dF55gfU",
- "outputId": "9f6c605c-6b51-473d-c1fa-b103e9208785"
- },
- "execution_count": null,
- "outputs": [
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "tokenizer.model: 0%| | 0.00/493k [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "de24d272f2b842c5b01eedb3f536b810"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "model-00001-of-00002.safetensors: 0%| | 0.00/9.94G [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "63626ac2d0f546188c07512a04c71417"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "Upload 3 LFS files: 0%| | 0/3 [00:00, ?it/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "a6f99dd0662846f9a381d2d507a7b447"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "model-00002-of-00002.safetensors: 0%| | 0.00/8.03G [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "8d54ae0d028b40e7b018454187db1a1c"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": [
- "'https://huggingface.co/mlabonne/NeuralPipe-9B-merged/tree/main/'"
- ],
- "application/vnd.google.colaboratory.intrinsic+json": {
- "type": "string"
- }
- },
- "metadata": {},
- "execution_count": 5
- }
- ]
- }
- ]
-}
\ No newline at end of file
diff --git a/Quantize_Llama_2_models_using_GGUF_and_llama_cpp.ipynb b/Quantize_Llama_2_models_using_GGUF_and_llama_cpp.ipynb
deleted file mode 100644
index cb3fc28..0000000
--- a/Quantize_Llama_2_models_using_GGUF_and_llama_cpp.ipynb
+++ /dev/null
@@ -1,2218 +0,0 @@
-{
- "nbformat": 4,
- "nbformat_minor": 0,
- "metadata": {
- "colab": {
- "provenance": [],
- "gpuType": "T4",
- "authorship_tag": "ABX9TyMohoDhmmKsuh9OLDHor3GB",
- "include_colab_link": true
- },
- "kernelspec": {
- "name": "python3",
- "display_name": "Python 3"
- },
- "language_info": {
- "name": "python"
- },
- "accelerator": "GPU",
- "widgets": {
- "application/vnd.jupyter.widget-state+json": {
- "c281b60e104f4c5da547bbdd7208d4bc": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "VBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "VBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "VBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_2e2fabac70484c1c8b16fa6ca8fd8537",
- "IPY_MODEL_bf53c635fa374420ad850eea22cd1e31",
- "IPY_MODEL_065d59126a734c1aa096ba40cd4a129f",
- "IPY_MODEL_e8855d5678a342f5a33171aa74d3b7bc"
- ],
- "layout": "IPY_MODEL_1c8a6b959f9c4443a92f58eff1b03077"
- }
- },
- "74b084c97f6f46d293a197bf9804460c": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_9fb5726f91734b1da149784680dc9624",
- "placeholder": "β",
- "style": "IPY_MODEL_202a8eb11eda4e58942113fbeacfdc3d",
- "value": "
Copy a token from your Hugging Face\ntokens page and paste it below.
Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. "
- ]
- },
- {
- "cell_type": "markdown",
- "source": [
- "# Quantize Llama 2 models using GGUF and llama.cpp\n",
- "> π£οΈ [Large Language Model Course](https://github.com/mlabonne/llm-course)\n",
- "\n",
- "β€οΈ Created by [@maximelabonne](https://twitter.com/maximelabonne).\n",
- "\n",
- "## Usage\n",
- "\n",
- "* `MODEL_ID`: The ID of the model to quantize (e.g., `mlabonne/EvolCodeLlama-7b`).\n",
- "* `QUANTIZATION_METHOD`: The quantization method to use.\n",
- "\n",
- "## Quantization methods\n",
- "\n",
- "The names of the quantization methods follow the naming convention: \"q\" + the number of bits + the variant used (detailed below). Here is a list of all the possible quant methods and their corresponding use cases, based on model cards made by [TheBloke](https://huggingface.co/TheBloke/):\n",
- "\n",
- "* `q2_k`: Uses Q4_K for the attention.vw and feed_forward.w2 tensors, Q2_K for the other tensors.\n",
- "* `q3_k_l`: Uses Q5_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else Q3_K\n",
- "* `q3_k_m`: Uses Q4_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else Q3_K\n",
- "* `q3_k_s`: Uses Q3_K for all tensors\n",
- "* `q4_0`: Original quant method, 4-bit.\n",
- "* `q4_1`: Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.\n",
- "* `q4_k_m`: Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q4_K\n",
- "* `q4_k_s`: Uses Q4_K for all tensors\n",
- "* `q5_0`: Higher accuracy, higher resource usage and slower inference.\n",
- "* `q5_1`: Even higher accuracy, resource usage and slower inference.\n",
- "* `q5_k_m`: Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q5_K\n",
- "* `q5_k_s`: Uses Q5_K for all tensors\n",
- "* `q6_k`: Uses Q8_K for all tensors\n",
- "* `q8_0`: Almost indistinguishable from float16. High resource use and slow. Not recommended for most users.\n",
- "\n",
- "As a rule of thumb, **I recommend using Q5_K_M** as it preserves most of the model's performance. Alternatively, you can use Q4_K_M if you want to save some memory. In general, K_M versions are better than K_S versions. I cannot recommend Q2_K or Q3_* versions, as they drastically decrease model performance."
- ],
- "metadata": {
- "id": "8y_Rk94LzG7I"
- }
- },
- {
- "cell_type": "code",
- "source": [
- "# Variables\n",
- "MODEL_ID = \"mlabonne/EvolCodeLlama-7b\"\n",
- "QUANTIZATION_METHODS = [\"q4_k_m\", \"q5_k_m\"]\n",
- "\n",
- "# Constants\n",
- "MODEL_NAME = MODEL_ID.split('/')[-1]\n",
- "\n",
- "# Install llama.cpp\n",
- "!git clone https://github.com/ggerganov/llama.cpp\n",
- "!cd llama.cpp && git pull && make clean && LLAMA_CUBLAS=1 make\n",
- "!pip install -r llama.cpp/requirements.txt\n",
- "\n",
- "# Download model\n",
- "!git lfs install\n",
- "!git clone https://huggingface.co/{MODEL_ID}\n",
- "\n",
- "# Convert to fp16\n",
- "fp16 = f\"{MODEL_NAME}/{MODEL_NAME.lower()}.fp16.bin\"\n",
- "!python llama.cpp/convert.py {MODEL_NAME} --outtype f16 --outfile {fp16}\n",
- "\n",
- "# Quantize the model for each method in the QUANTIZATION_METHODS list\n",
- "for method in QUANTIZATION_METHODS:\n",
- " qtype = f\"{MODEL_NAME}/{MODEL_NAME.lower()}.{method.upper()}.gguf\"\n",
- " !./llama.cpp/quantize {fp16} {qtype} {method}"
- ],
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "fD24jJxq7t3k",
- "outputId": "94954934-0829-44e9-a5e5-262c17e162d0"
- },
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "ggml_init_cublas: found 1 CUDA devices:\n",
- " Device 0: Tesla T4, compute capability 7.5\n",
- "main: build = 1100 (dd0dc36)\n",
- "main: quantizing 'EvolCodeLlama-7b/evolcodellama-7b.gguf.fp16.bin' to 'EvolCodeLlama-7b/evolcodellama-7b.gguf.q4_k_s.bin' as Q4_K_S\n",
- "llama_model_loader: loaded meta data with 16 key-value pairs and 291 tensors from EvolCodeLlama-7b/evolcodellama-7b.gguf.fp16.bin (version GGUF V1 (support until nov 2023))\n",
- "llama_model_loader: - tensor 0: token_embd.weight f16 [ 4096, 32016, 1, 1 ]\n",
- "llama_model_loader: - tensor 1: blk.0.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 2: blk.0.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 3: blk.0.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 4: blk.0.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 5: blk.0.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 6: blk.0.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 7: blk.0.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 8: blk.0.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 9: blk.0.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 10: blk.1.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 11: blk.1.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 12: blk.1.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 13: blk.1.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 14: blk.1.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 15: blk.1.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 16: blk.1.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 17: blk.1.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 18: blk.1.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 19: blk.2.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 20: blk.2.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 21: blk.2.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 22: blk.2.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 23: blk.2.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 24: blk.2.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 25: blk.2.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 26: blk.2.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 27: blk.2.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 28: blk.3.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 29: blk.3.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 30: blk.3.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 31: blk.3.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 32: blk.3.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 33: blk.3.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 34: blk.3.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 35: blk.3.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 36: blk.3.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 37: blk.4.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 38: blk.4.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 39: blk.4.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 40: blk.4.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 41: blk.4.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 42: blk.4.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 43: blk.4.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 44: blk.4.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 45: blk.4.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 46: blk.5.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 47: blk.5.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 48: blk.5.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 49: blk.5.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 50: blk.5.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 51: blk.5.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 52: blk.5.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 53: blk.5.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 54: blk.5.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 55: blk.6.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 56: blk.6.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 57: blk.6.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 58: blk.6.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 59: blk.6.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 60: blk.6.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 61: blk.6.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 62: blk.6.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 63: blk.6.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 64: blk.7.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 65: blk.7.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 66: blk.7.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 67: blk.7.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 68: blk.7.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 69: blk.7.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 70: blk.7.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 71: blk.7.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 72: blk.7.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 73: blk.8.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 74: blk.8.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 75: blk.8.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 76: blk.8.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 77: blk.8.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 78: blk.8.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 79: blk.8.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 80: blk.8.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 81: blk.8.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 82: blk.9.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 83: blk.9.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 84: blk.9.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 85: blk.9.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 86: blk.9.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 87: blk.9.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 88: blk.9.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 89: blk.9.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 90: blk.9.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 91: blk.10.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 92: blk.10.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 93: blk.10.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 94: blk.10.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 95: blk.10.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 96: blk.10.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 97: blk.10.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 98: blk.10.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 99: blk.10.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 100: blk.11.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 101: blk.11.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 102: blk.11.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 103: blk.11.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 104: blk.11.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 105: blk.11.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 106: blk.11.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 107: blk.11.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 108: blk.11.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 109: blk.12.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 110: blk.12.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 111: blk.12.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 112: blk.12.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 113: blk.12.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 114: blk.12.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 115: blk.12.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 116: blk.12.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 117: blk.12.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 118: blk.13.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 119: blk.13.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 120: blk.13.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 121: blk.13.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 122: blk.13.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 123: blk.13.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 124: blk.13.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 125: blk.13.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 126: blk.13.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 127: blk.14.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 128: blk.14.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 129: blk.14.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 130: blk.14.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 131: blk.14.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 132: blk.14.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 133: blk.14.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 134: blk.14.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 135: blk.14.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 136: blk.15.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 137: blk.15.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 138: blk.15.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 139: blk.15.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 140: blk.15.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 141: blk.15.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 142: blk.15.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 143: blk.15.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 144: blk.15.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 145: blk.16.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 146: blk.16.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 147: blk.16.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 148: blk.16.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 149: blk.16.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 150: blk.16.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 151: blk.16.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 152: blk.16.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 153: blk.16.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 154: blk.17.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 155: blk.17.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 156: blk.17.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 157: blk.17.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 158: blk.17.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 159: blk.17.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 160: blk.17.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 161: blk.17.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 162: blk.17.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 163: blk.18.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 164: blk.18.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 165: blk.18.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 166: blk.18.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 167: blk.18.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 168: blk.18.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 169: blk.18.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 170: blk.18.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 171: blk.18.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 172: blk.19.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 173: blk.19.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 174: blk.19.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 175: blk.19.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 176: blk.19.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 177: blk.19.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 178: blk.19.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 179: blk.19.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 180: blk.19.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 181: blk.20.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 182: blk.20.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 183: blk.20.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 184: blk.20.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 185: blk.20.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 186: blk.20.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 187: blk.20.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 188: blk.20.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 189: blk.20.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 190: blk.21.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 191: blk.21.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 192: blk.21.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 193: blk.21.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 194: blk.21.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 195: blk.21.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 196: blk.21.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 197: blk.21.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 198: blk.21.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 199: blk.22.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 200: blk.22.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 201: blk.22.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 202: blk.22.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 203: blk.22.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 204: blk.22.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 205: blk.22.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 206: blk.22.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 207: blk.22.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 208: blk.23.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 209: blk.23.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 210: blk.23.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 211: blk.23.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 212: blk.23.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 213: blk.23.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 214: blk.23.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 215: blk.23.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 216: blk.23.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 217: blk.24.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 218: blk.24.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 219: blk.24.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 220: blk.24.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 221: blk.24.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 222: blk.24.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 223: blk.24.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 224: blk.24.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 225: blk.24.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 226: blk.25.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 227: blk.25.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 228: blk.25.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 229: blk.25.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 230: blk.25.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 231: blk.25.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 232: blk.25.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 233: blk.25.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 234: blk.25.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 235: blk.26.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 236: blk.26.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 237: blk.26.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 238: blk.26.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 239: blk.26.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 240: blk.26.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 241: blk.26.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 242: blk.26.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 243: blk.26.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 244: blk.27.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 245: blk.27.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 246: blk.27.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 247: blk.27.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 248: blk.27.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 249: blk.27.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 250: blk.27.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 251: blk.27.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 252: blk.27.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 253: blk.28.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 254: blk.28.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 255: blk.28.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 256: blk.28.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 257: blk.28.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 258: blk.28.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 259: blk.28.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 260: blk.28.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 261: blk.28.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 262: blk.29.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 263: blk.29.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 264: blk.29.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 265: blk.29.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 266: blk.29.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 267: blk.29.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 268: blk.29.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 269: blk.29.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 270: blk.29.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 271: blk.30.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 272: blk.30.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 273: blk.30.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 274: blk.30.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 275: blk.30.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 276: blk.30.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 277: blk.30.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 278: blk.30.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 279: blk.30.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 280: blk.31.attn_q.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 281: blk.31.attn_k.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 282: blk.31.attn_v.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 283: blk.31.attn_output.weight f16 [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 284: blk.31.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 285: blk.31.ffn_up.weight f16 [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 286: blk.31.ffn_down.weight f16 [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 287: blk.31.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 288: blk.31.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 289: output_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 290: output.weight f16 [ 4096, 32016, 1, 1 ]\n",
- "llama_model_loader: - kv 0: general.architecture str \n",
- "llama_model_loader: - kv 1: general.name str \n",
- "llama_model_loader: - kv 2: llama.context_length u32 \n",
- "llama_model_loader: - kv 3: llama.embedding_length u32 \n",
- "llama_model_loader: - kv 4: llama.block_count u32 \n",
- "llama_model_loader: - kv 5: llama.feed_forward_length u32 \n",
- "llama_model_loader: - kv 6: llama.rope.dimension_count u32 \n",
- "llama_model_loader: - kv 7: llama.attention.head_count u32 \n",
- "llama_model_loader: - kv 8: llama.attention.head_count_kv u32 \n",
- "llama_model_loader: - kv 9: llama.attention.layer_norm_rms_epsilon f32 \n",
- "llama_model_loader: - kv 10: llama.rope.freq_base f32 \n",
- "llama_model_loader: - kv 11: general.file_type u32 \n",
- "llama_model_loader: - kv 12: tokenizer.ggml.model str \n",
- "llama_model_loader: - kv 13: tokenizer.ggml.tokens arr \n",
- "llama_model_loader: - kv 14: tokenizer.ggml.scores arr \n",
- "llama_model_loader: - kv 15: tokenizer.ggml.token_type arr \n",
- "llama_model_loader: - type f32: 65 tensors\n",
- "llama_model_loader: - type f16: 226 tensors\n",
- "llama_model_quantize_internal: meta size = 741408 bytes\n",
- "[ 1/ 291] token_embd.weight - [ 4096, 32016, 1, 1], type = f16, quantizing to q4_K .. size = 250.12 MB -> 70.35 MB | hist: \n",
- "[ 2/ 291] blk.0.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 3/ 291] blk.0.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 4/ 291] blk.0.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MB -> 11.00 MB | hist: \n",
- "[ 5/ 291] blk.0.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 6/ 291] blk.0.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 7/ 291] blk.0.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 8/ 291] blk.0.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MB -> 29.56 MB | hist: \n",
- "[ 9/ 291] blk.0.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 10/ 291] blk.0.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 11/ 291] blk.1.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 12/ 291] blk.1.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 13/ 291] blk.1.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MB -> 11.00 MB | hist: \n",
- "[ 14/ 291] blk.1.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 15/ 291] blk.1.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 16/ 291] blk.1.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 17/ 291] blk.1.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MB -> 29.56 MB | hist: \n",
- "[ 18/ 291] blk.1.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 19/ 291] blk.1.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 20/ 291] blk.2.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 21/ 291] blk.2.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 22/ 291] blk.2.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MB -> 11.00 MB | hist: \n",
- "[ 23/ 291] blk.2.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 24/ 291] blk.2.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 25/ 291] blk.2.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 26/ 291] blk.2.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MB -> 29.56 MB | hist: \n",
- "[ 27/ 291] blk.2.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 28/ 291] blk.2.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 29/ 291] blk.3.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 30/ 291] blk.3.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 31/ 291] blk.3.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 32.00 MB -> 11.00 MB | hist: \n",
- "[ 32/ 291] blk.3.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 33/ 291] blk.3.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 34/ 291] blk.3.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 35/ 291] blk.3.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q5_K .. size = 86.00 MB -> 29.56 MB | hist: \n",
- "[ 36/ 291] blk.3.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 37/ 291] blk.3.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 38/ 291] blk.4.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 39/ 291] blk.4.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 40/ 291] blk.4.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 41/ 291] blk.4.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 42/ 291] blk.4.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 43/ 291] blk.4.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 44/ 291] blk.4.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 45/ 291] blk.4.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 46/ 291] blk.4.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 47/ 291] blk.5.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 48/ 291] blk.5.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 49/ 291] blk.5.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 50/ 291] blk.5.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 51/ 291] blk.5.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 52/ 291] blk.5.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 53/ 291] blk.5.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 54/ 291] blk.5.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 55/ 291] blk.5.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 56/ 291] blk.6.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 57/ 291] blk.6.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 58/ 291] blk.6.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 59/ 291] blk.6.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 60/ 291] blk.6.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 61/ 291] blk.6.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 62/ 291] blk.6.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 63/ 291] blk.6.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 64/ 291] blk.6.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 65/ 291] blk.7.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 66/ 291] blk.7.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 67/ 291] blk.7.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 68/ 291] blk.7.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 69/ 291] blk.7.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 70/ 291] blk.7.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 71/ 291] blk.7.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 72/ 291] blk.7.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 73/ 291] blk.7.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 74/ 291] blk.8.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 75/ 291] blk.8.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 76/ 291] blk.8.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 77/ 291] blk.8.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 78/ 291] blk.8.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 79/ 291] blk.8.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 80/ 291] blk.8.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 81/ 291] blk.8.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 82/ 291] blk.8.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 83/ 291] blk.9.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 84/ 291] blk.9.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 85/ 291] blk.9.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 86/ 291] blk.9.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 87/ 291] blk.9.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 88/ 291] blk.9.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 89/ 291] blk.9.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 90/ 291] blk.9.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 91/ 291] blk.9.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 92/ 291] blk.10.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 93/ 291] blk.10.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 94/ 291] blk.10.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 95/ 291] blk.10.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 96/ 291] blk.10.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 97/ 291] blk.10.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 98/ 291] blk.10.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 99/ 291] blk.10.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 100/ 291] blk.10.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 101/ 291] blk.11.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 102/ 291] blk.11.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 103/ 291] blk.11.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 104/ 291] blk.11.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 105/ 291] blk.11.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 106/ 291] blk.11.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 107/ 291] blk.11.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 108/ 291] blk.11.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 109/ 291] blk.11.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 110/ 291] blk.12.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 111/ 291] blk.12.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 112/ 291] blk.12.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 113/ 291] blk.12.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 114/ 291] blk.12.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 115/ 291] blk.12.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 116/ 291] blk.12.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 117/ 291] blk.12.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 118/ 291] blk.12.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 119/ 291] blk.13.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 120/ 291] blk.13.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 121/ 291] blk.13.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 122/ 291] blk.13.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 123/ 291] blk.13.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 124/ 291] blk.13.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 125/ 291] blk.13.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 126/ 291] blk.13.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 127/ 291] blk.13.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 128/ 291] blk.14.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 129/ 291] blk.14.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 130/ 291] blk.14.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 131/ 291] blk.14.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 132/ 291] blk.14.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 133/ 291] blk.14.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 134/ 291] blk.14.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 135/ 291] blk.14.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 136/ 291] blk.14.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 137/ 291] blk.15.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 138/ 291] blk.15.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 139/ 291] blk.15.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 140/ 291] blk.15.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 141/ 291] blk.15.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 142/ 291] blk.15.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 143/ 291] blk.15.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 144/ 291] blk.15.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 145/ 291] blk.15.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 146/ 291] blk.16.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 147/ 291] blk.16.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 148/ 291] blk.16.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 149/ 291] blk.16.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 150/ 291] blk.16.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 151/ 291] blk.16.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 152/ 291] blk.16.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 153/ 291] blk.16.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 154/ 291] blk.16.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 155/ 291] blk.17.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 156/ 291] blk.17.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 157/ 291] blk.17.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 158/ 291] blk.17.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 159/ 291] blk.17.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 160/ 291] blk.17.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 161/ 291] blk.17.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 162/ 291] blk.17.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 163/ 291] blk.17.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 164/ 291] blk.18.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 165/ 291] blk.18.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 166/ 291] blk.18.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 167/ 291] blk.18.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 168/ 291] blk.18.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 169/ 291] blk.18.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 170/ 291] blk.18.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 171/ 291] blk.18.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 172/ 291] blk.18.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 173/ 291] blk.19.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 174/ 291] blk.19.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 175/ 291] blk.19.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 176/ 291] blk.19.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 177/ 291] blk.19.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 178/ 291] blk.19.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 179/ 291] blk.19.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 180/ 291] blk.19.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 181/ 291] blk.19.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 182/ 291] blk.20.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 183/ 291] blk.20.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 184/ 291] blk.20.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 185/ 291] blk.20.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 186/ 291] blk.20.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 187/ 291] blk.20.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 188/ 291] blk.20.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 189/ 291] blk.20.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 190/ 291] blk.20.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 191/ 291] blk.21.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 192/ 291] blk.21.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 193/ 291] blk.21.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 194/ 291] blk.21.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 195/ 291] blk.21.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 196/ 291] blk.21.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 197/ 291] blk.21.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 198/ 291] blk.21.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 199/ 291] blk.21.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 200/ 291] blk.22.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 201/ 291] blk.22.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 202/ 291] blk.22.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 203/ 291] blk.22.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 204/ 291] blk.22.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 205/ 291] blk.22.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 206/ 291] blk.22.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 207/ 291] blk.22.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 208/ 291] blk.22.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 209/ 291] blk.23.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 210/ 291] blk.23.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 211/ 291] blk.23.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 212/ 291] blk.23.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 213/ 291] blk.23.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 214/ 291] blk.23.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 215/ 291] blk.23.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 216/ 291] blk.23.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 217/ 291] blk.23.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 218/ 291] blk.24.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 219/ 291] blk.24.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 220/ 291] blk.24.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 221/ 291] blk.24.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 222/ 291] blk.24.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 223/ 291] blk.24.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 224/ 291] blk.24.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 225/ 291] blk.24.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 226/ 291] blk.24.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 227/ 291] blk.25.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 228/ 291] blk.25.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 229/ 291] blk.25.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 230/ 291] blk.25.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 231/ 291] blk.25.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 232/ 291] blk.25.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 233/ 291] blk.25.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 234/ 291] blk.25.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 235/ 291] blk.25.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 236/ 291] blk.26.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 237/ 291] blk.26.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 238/ 291] blk.26.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 239/ 291] blk.26.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 240/ 291] blk.26.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 241/ 291] blk.26.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 242/ 291] blk.26.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 243/ 291] blk.26.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 244/ 291] blk.26.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 245/ 291] blk.27.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 246/ 291] blk.27.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 247/ 291] blk.27.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 248/ 291] blk.27.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 249/ 291] blk.27.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 250/ 291] blk.27.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 251/ 291] blk.27.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 252/ 291] blk.27.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 253/ 291] blk.27.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 254/ 291] blk.28.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 255/ 291] blk.28.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 256/ 291] blk.28.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 257/ 291] blk.28.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 258/ 291] blk.28.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 259/ 291] blk.28.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 260/ 291] blk.28.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 261/ 291] blk.28.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 262/ 291] blk.28.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 263/ 291] blk.29.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 264/ 291] blk.29.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 265/ 291] blk.29.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 266/ 291] blk.29.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 267/ 291] blk.29.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 268/ 291] blk.29.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 269/ 291] blk.29.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 270/ 291] blk.29.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 271/ 291] blk.29.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 272/ 291] blk.30.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 273/ 291] blk.30.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 274/ 291] blk.30.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 275/ 291] blk.30.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 276/ 291] blk.30.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 277/ 291] blk.30.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 278/ 291] blk.30.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 279/ 291] blk.30.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 280/ 291] blk.30.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 281/ 291] blk.31.attn_q.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 282/ 291] blk.31.attn_k.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 283/ 291] blk.31.attn_v.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 284/ 291] blk.31.attn_output.weight - [ 4096, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 32.00 MB -> 9.00 MB | hist: \n",
- "[ 285/ 291] blk.31.ffn_gate.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 286/ 291] blk.31.ffn_up.weight - [ 4096, 11008, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 287/ 291] blk.31.ffn_down.weight - [11008, 4096, 1, 1], type = f16, quantizing to q4_K .. size = 86.00 MB -> 24.19 MB | hist: \n",
- "[ 288/ 291] blk.31.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 289/ 291] blk.31.ffn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 290/ 291] output_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MB\n",
- "[ 291/ 291] output.weight - [ 4096, 32016, 1, 1], type = f16, quantizing to q6_K .. size = 250.12 MB -> 102.59 MB | hist: \n",
- "llama_model_quantize_internal: model size = 12853.27 MB\n",
- "llama_model_quantize_internal: quant size = 3677.45 MB\n",
- "\n",
- "main: quantize time = 1089230.46 ms\n",
- "main: total time = 1089230.46 ms\n"
- ]
- }
- ]
- },
- {
- "cell_type": "markdown",
- "source": [
- "## Run inference\n",
- "\n",
- "Here is a simple script to run your quantized models. I'm offloading every layer to the GPU (35 for a 7b parameter model) to speed up inference."
- ],
- "metadata": {
- "id": "WqI1CPiXI4dP"
- }
- },
- {
- "cell_type": "code",
- "source": [
- "import os\n",
- "\n",
- "model_list = [file for file in os.listdir(MODEL_NAME) if \"gguf\" in file]\n",
- "\n",
- "prompt = input(\"Enter your prompt: \")\n",
- "chosen_method = input(\"Name of the model (options: \" + \", \".join(model_list) + \"): \")\n",
- "\n",
- "# Verify the chosen method is in the list\n",
- "if chosen_method not in model_list:\n",
- " print(\"Invalid name\")\n",
- "else:\n",
- " qtype = f\"{MODEL_NAME}/{MODEL_NAME.lower()}.{method.upper()}.gguf\"\n",
- " !./llama.cpp/main -m {qtype} -n 128 --color -ngl 35 -p \"{prompt}\""
- ],
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "vNPL9WYg78l-",
- "outputId": "3c3e7d2f-f0de-429d-fd97-dab480bc514a"
- },
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "Enter your prompt: prompt\n",
- "Please specify the quantization method to run the model (options: q4_k_s): q4_k_s\n",
- "main: build = 1100 (dd0dc36)\n",
- "main: seed = 1693227123\n",
- "ggml_init_cublas: found 1 CUDA devices:\n",
- " Device 0: Tesla T4, compute capability 7.5\n",
- "llama_model_loader: loaded meta data with 17 key-value pairs and 291 tensors from EvolCodeLlama-7b/evolcodellama-7b.gguf.q4_k_s.bin (version GGUF V2 (latest))\n",
- "llama_model_loader: - tensor 0: token_embd.weight q4_K [ 4096, 32016, 1, 1 ]\n",
- "llama_model_loader: - tensor 1: blk.0.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 2: blk.0.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 3: blk.0.attn_v.weight q5_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 4: blk.0.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 5: blk.0.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 6: blk.0.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 7: blk.0.ffn_down.weight q5_K [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 8: blk.0.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 9: blk.0.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 10: blk.1.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 11: blk.1.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 12: blk.1.attn_v.weight q5_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 13: blk.1.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 14: blk.1.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 15: blk.1.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 16: blk.1.ffn_down.weight q5_K [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 17: blk.1.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 18: blk.1.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 19: blk.2.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 20: blk.2.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 21: blk.2.attn_v.weight q5_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 22: blk.2.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 23: blk.2.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 24: blk.2.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 25: blk.2.ffn_down.weight q5_K [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 26: blk.2.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 27: blk.2.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 28: blk.3.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 29: blk.3.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 30: blk.3.attn_v.weight q5_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 31: blk.3.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 32: blk.3.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 33: blk.3.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 34: blk.3.ffn_down.weight q5_K [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 35: blk.3.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 36: blk.3.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 37: blk.4.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 38: blk.4.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 39: blk.4.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 40: blk.4.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 41: blk.4.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 42: blk.4.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 43: blk.4.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 44: blk.4.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 45: blk.4.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 46: blk.5.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 47: blk.5.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 48: blk.5.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 49: blk.5.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 50: blk.5.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 51: blk.5.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 52: blk.5.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 53: blk.5.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 54: blk.5.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 55: blk.6.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 56: blk.6.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 57: blk.6.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 58: blk.6.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 59: blk.6.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 60: blk.6.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 61: blk.6.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 62: blk.6.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 63: blk.6.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 64: blk.7.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 65: blk.7.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 66: blk.7.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 67: blk.7.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 68: blk.7.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 69: blk.7.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 70: blk.7.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 71: blk.7.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 72: blk.7.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 73: blk.8.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 74: blk.8.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 75: blk.8.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 76: blk.8.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 77: blk.8.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 78: blk.8.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 79: blk.8.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 80: blk.8.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 81: blk.8.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 82: blk.9.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 83: blk.9.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 84: blk.9.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 85: blk.9.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 86: blk.9.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 87: blk.9.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 88: blk.9.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 89: blk.9.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 90: blk.9.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 91: blk.10.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 92: blk.10.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 93: blk.10.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 94: blk.10.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 95: blk.10.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 96: blk.10.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 97: blk.10.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 98: blk.10.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 99: blk.10.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 100: blk.11.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 101: blk.11.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 102: blk.11.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 103: blk.11.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 104: blk.11.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 105: blk.11.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 106: blk.11.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 107: blk.11.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 108: blk.11.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 109: blk.12.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 110: blk.12.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 111: blk.12.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 112: blk.12.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 113: blk.12.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 114: blk.12.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 115: blk.12.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 116: blk.12.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 117: blk.12.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 118: blk.13.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 119: blk.13.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 120: blk.13.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 121: blk.13.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 122: blk.13.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 123: blk.13.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 124: blk.13.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 125: blk.13.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 126: blk.13.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 127: blk.14.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 128: blk.14.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 129: blk.14.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 130: blk.14.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 131: blk.14.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 132: blk.14.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 133: blk.14.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 134: blk.14.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 135: blk.14.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 136: blk.15.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 137: blk.15.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 138: blk.15.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 139: blk.15.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 140: blk.15.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 141: blk.15.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 142: blk.15.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 143: blk.15.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 144: blk.15.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 145: blk.16.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 146: blk.16.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 147: blk.16.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 148: blk.16.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 149: blk.16.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 150: blk.16.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 151: blk.16.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 152: blk.16.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 153: blk.16.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 154: blk.17.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 155: blk.17.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 156: blk.17.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 157: blk.17.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 158: blk.17.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 159: blk.17.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 160: blk.17.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 161: blk.17.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 162: blk.17.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 163: blk.18.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 164: blk.18.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 165: blk.18.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 166: blk.18.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 167: blk.18.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 168: blk.18.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 169: blk.18.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 170: blk.18.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 171: blk.18.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 172: blk.19.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 173: blk.19.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 174: blk.19.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 175: blk.19.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 176: blk.19.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 177: blk.19.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 178: blk.19.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 179: blk.19.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 180: blk.19.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 181: blk.20.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 182: blk.20.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 183: blk.20.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 184: blk.20.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 185: blk.20.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 186: blk.20.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 187: blk.20.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 188: blk.20.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 189: blk.20.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 190: blk.21.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 191: blk.21.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 192: blk.21.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 193: blk.21.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 194: blk.21.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 195: blk.21.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 196: blk.21.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 197: blk.21.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 198: blk.21.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 199: blk.22.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 200: blk.22.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 201: blk.22.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 202: blk.22.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 203: blk.22.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 204: blk.22.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 205: blk.22.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 206: blk.22.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 207: blk.22.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 208: blk.23.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 209: blk.23.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 210: blk.23.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 211: blk.23.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 212: blk.23.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 213: blk.23.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 214: blk.23.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 215: blk.23.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 216: blk.23.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 217: blk.24.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 218: blk.24.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 219: blk.24.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 220: blk.24.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 221: blk.24.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 222: blk.24.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 223: blk.24.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 224: blk.24.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 225: blk.24.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 226: blk.25.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 227: blk.25.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 228: blk.25.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 229: blk.25.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 230: blk.25.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 231: blk.25.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 232: blk.25.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 233: blk.25.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 234: blk.25.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 235: blk.26.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 236: blk.26.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 237: blk.26.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 238: blk.26.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 239: blk.26.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 240: blk.26.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 241: blk.26.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 242: blk.26.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 243: blk.26.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 244: blk.27.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 245: blk.27.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 246: blk.27.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 247: blk.27.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 248: blk.27.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 249: blk.27.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 250: blk.27.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 251: blk.27.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 252: blk.27.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 253: blk.28.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 254: blk.28.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 255: blk.28.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 256: blk.28.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 257: blk.28.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 258: blk.28.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 259: blk.28.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 260: blk.28.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 261: blk.28.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 262: blk.29.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 263: blk.29.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 264: blk.29.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 265: blk.29.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 266: blk.29.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 267: blk.29.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 268: blk.29.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 269: blk.29.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 270: blk.29.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 271: blk.30.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 272: blk.30.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 273: blk.30.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 274: blk.30.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 275: blk.30.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 276: blk.30.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 277: blk.30.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 278: blk.30.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 279: blk.30.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 280: blk.31.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 281: blk.31.attn_k.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 282: blk.31.attn_v.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 283: blk.31.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 284: blk.31.ffn_gate.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 285: blk.31.ffn_up.weight q4_K [ 4096, 11008, 1, 1 ]\n",
- "llama_model_loader: - tensor 286: blk.31.ffn_down.weight q4_K [ 11008, 4096, 1, 1 ]\n",
- "llama_model_loader: - tensor 287: blk.31.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 288: blk.31.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 289: output_norm.weight f32 [ 4096, 1, 1, 1 ]\n",
- "llama_model_loader: - tensor 290: output.weight q6_K [ 4096, 32016, 1, 1 ]\n",
- "llama_model_loader: - kv 0: general.architecture str \n",
- "llama_model_loader: - kv 1: general.name str \n",
- "llama_model_loader: - kv 2: llama.context_length u32 \n",
- "llama_model_loader: - kv 3: llama.embedding_length u32 \n",
- "llama_model_loader: - kv 4: llama.block_count u32 \n",
- "llama_model_loader: - kv 5: llama.feed_forward_length u32 \n",
- "llama_model_loader: - kv 6: llama.rope.dimension_count u32 \n",
- "llama_model_loader: - kv 7: llama.attention.head_count u32 \n",
- "llama_model_loader: - kv 8: llama.attention.head_count_kv u32 \n",
- "llama_model_loader: - kv 9: llama.attention.layer_norm_rms_epsilon f32 \n",
- "llama_model_loader: - kv 10: llama.rope.freq_base f32 \n",
- "llama_model_loader: - kv 11: general.file_type u32 \n",
- "llama_model_loader: - kv 12: tokenizer.ggml.model str \n",
- "llama_model_loader: - kv 13: tokenizer.ggml.tokens arr \n",
- "llama_model_loader: - kv 14: tokenizer.ggml.scores arr \n",
- "llama_model_loader: - kv 15: tokenizer.ggml.token_type arr \n",
- "llama_model_loader: - kv 16: general.quantization_version u32 \n",
- "llama_model_loader: - type f32: 65 tensors\n",
- "llama_model_loader: - type q4_K: 217 tensors\n",
- "llama_model_loader: - type q5_K: 8 tensors\n",
- "llama_model_loader: - type q6_K: 1 tensors\n",
- "llm_load_print_meta: format = GGUF V2 (latest)\n",
- "llm_load_print_meta: arch = llama\n",
- "llm_load_print_meta: vocab type = SPM\n",
- "llm_load_print_meta: n_vocab = 32016\n",
- "llm_load_print_meta: n_merges = 0\n",
- "llm_load_print_meta: n_ctx_train = 16384\n",
- "llm_load_print_meta: n_ctx = 512\n",
- "llm_load_print_meta: n_embd = 4096\n",
- "llm_load_print_meta: n_head = 32\n",
- "llm_load_print_meta: n_head_kv = 32\n",
- "llm_load_print_meta: n_layer = 32\n",
- "llm_load_print_meta: n_rot = 128\n",
- "llm_load_print_meta: n_gqa = 1\n",
- "llm_load_print_meta: f_norm_eps = 1.0e-05\n",
- "llm_load_print_meta: f_norm_rms_eps = 1.0e-05\n",
- "llm_load_print_meta: n_ff = 11008\n",
- "llm_load_print_meta: freq_base = 1000000.0\n",
- "llm_load_print_meta: freq_scale = 1\n",
- "llm_load_print_meta: model type = 7B\n",
- "llm_load_print_meta: model ftype = mostly Q4_K - Small\n",
- "llm_load_print_meta: model size = 6.74 B\n",
- "llm_load_print_meta: general.name = LLaMA\n",
- "llm_load_print_meta: BOS token = 1 '
'\n",
- "llm_load_print_meta: EOS token = 2 ''\n",
- "llm_load_print_meta: UNK token = 0 '
Copy a token from your Hugging Face\ntokens page and paste it below.
Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. "
- ]
- },
- {
- "cell_type": "markdown",
- "source": [
- "# ExLlamaV2: The Fastest Library to Run LLMs\n",
- "\n",
- "β€οΈ Created by [@maximelabonne](https://twitter.com/maximelabonne) as part of the π£οΈ [Large Language Model Course](https://github.com/mlabonne/llm-course)."
- ],
- "metadata": {
- "id": "QzUdY6GPiZXG"
- }
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "4niZ8igkiXZb"
- },
- "outputs": [],
- "source": [
- "# Install ExLLamaV2\n",
- "!git clone https://github.com/turboderp/exllamav2\n",
- "!pip install -e exllamav2"
- ]
- },
- {
- "cell_type": "code",
- "source": [
- "MODEL_NAME = \"zephyr-7b-beta\"\n",
- "BPW = 5.0\n",
- "\n",
- "# Download model\n",
- "!git lfs install\n",
- "!git clone https://huggingface.co/HuggingFaceH4/{MODEL_NAME}\n",
- "!mv {MODEL_NAME} base_model\n",
- "!rm base_mode/*.bin\n",
- "\n",
- "# Download dataset\n",
- "!wget https://huggingface.co/datasets/wikitext/resolve/9a9e482b5987f9d25b3a9b2883fc6cc9fd8071b3/wikitext-103-v1/wikitext-test.parquet"
- ],
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "elHDPmXAil0c",
- "outputId": "5b732dac-fead-4ab2-f3e8-7b3622d3c690"
- },
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "Git LFS initialized.\n",
- "Cloning into 'zephyr-7b-beta'...\n",
- "remote: Enumerating objects: 55, done.\u001b[K\n",
- "remote: Total 55 (delta 0), reused 0 (delta 0), pack-reused 55\u001b[K\n",
- "Unpacking objects: 100% (55/55), 534.67 KiB | 4.73 MiB/s, done.\n",
- "Filtering content: 100% (10/10), 13.48 GiB | 129.35 MiB/s, done.\n",
- "rm: cannot remove 'base_mode/*.bin': No such file or directory\n",
- "--2023-11-03 18:05:02-- https://huggingface.co/datasets/wikitext/resolve/9a9e482b5987f9d25b3a9b2883fc6cc9fd8071b3/wikitext-103-v1/wikitext-test.parquet\n",
- "Resolving huggingface.co (huggingface.co)... 65.8.178.27, 65.8.178.93, 65.8.178.118, ...\n",
- "Connecting to huggingface.co (huggingface.co)|65.8.178.27|:443... connected.\n",
- "HTTP request sent, awaiting response... 200 OK\n",
- "Length: 721735 (705K)\n",
- "Saving to: βwikitext-test.parquetβ\n",
- "\n",
- "wikitext-test.parqu 100%[===================>] 704.82K --.-KB/s in 0.1s \n",
- "\n",
- "2023-11-03 18:05:02 (5.57 MB/s) - βwikitext-test.parquetβ saved [721735/721735]\n",
- "\n"
- ]
- }
- ]
- },
- {
- "cell_type": "code",
- "source": [
- "# Quantize model\n",
- "!mkdir quant\n",
- "!python exllamav2/convert.py \\\n",
- " -i base_model \\\n",
- " -o quant \\\n",
- " -c wikitext-test.parquet \\\n",
- " -b {BPW}"
- ],
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "jigQLg8Fis1f",
- "outputId": "5f2e97ee-f823-45f7-f39a-f4c6a774b587"
- },
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "\u001b[1;30;43mStreaming output truncated to the last 5000 lines.\u001b[0m\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01444\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01401\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01332\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00876\n",
- " -- Time: 3.78 seconds\n",
- " -- Linear: model.layers.7.self_attn.o_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.19280\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.17356\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.16492\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.14247\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.09048\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.08221\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.10930\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.09792\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.09219\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.07561\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.07156\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.05653\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.04767\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04434\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04352\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02850\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02416\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02348\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02138\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02088\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01587\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01675\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01480\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01294\n",
- " -- Time: 6.21 seconds\n",
- " -- Layer: model.layers.7 (MLP)\n",
- " -- Linear: model.layers.7.mlp.gate_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.17192\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.16163\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.15815\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.14380\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.08135\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.07770\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.09087\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.08363\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.08210\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.07280\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.06958\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.04646\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.04014\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.03912\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.03888\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02327\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02036\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02012\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01866\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01851\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01246\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01264\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01212\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00882\n",
- " -- Time: 16.78 seconds\n",
- " -- Linear: model.layers.7.mlp.up_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.24041\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.22653\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.22191\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.20179\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.11401\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.10911\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12697\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11680\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.11501\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.10219\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.09774\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06494\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05600\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05474\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05444\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03253\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02831\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02799\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02593\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02574\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01738\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01727\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01695\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01176\n",
- " -- Time: 16.71 seconds\n",
- " -- Linear: model.layers.7.mlp.down_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.18 bpw rfn_error: 0.20930\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.18827\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.17987\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.16126\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.09629\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.71 bpw rfn_error: 0.08799\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.11395\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.10350\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.18 bpw rfn_error: 0.09877\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.08309\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.07956\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.05813\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.04962\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04637\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.32 bpw rfn_error: 0.04557\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02919\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02440\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.34 bpw rfn_error: 0.02402\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02185\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02135\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01579\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01587\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.23 bpw rfn_error: 0.01471\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01128\n",
- " -- Time: 26.80 seconds\n",
- " -- Layer: model.layers.8 (Attention)\n",
- " -- Linear: model.layers.8.self_attn.q_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.07321\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.06490\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.05988\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.05294\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.03374\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.02962\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.04266\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.03904\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.03463\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.02839\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.02717\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.02166\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.01866\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.01627\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.01567\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01085\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.00853\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.00809\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.00748\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.00708\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00570\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00571\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00489\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00384\n",
- " -- Time: 6.22 seconds\n",
- " -- Linear: model.layers.8.self_attn.k_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.06478\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.05636\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.05113\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.04496\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.02945\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.02518\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.03836\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.03491\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.03047\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.02440\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.02347\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.01943\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.01666\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.01420\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.01356\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.00974\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.00741\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.00695\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.00641\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.00598\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00506\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00499\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00419\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00326\n",
- " -- Time: 3.87 seconds\n",
- " -- Linear: model.layers.8.self_attn.v_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.18482\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.16487\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.15686\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.13830\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.08576\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.07784\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.10130\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.09213\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.08768\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.07196\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.06795\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.05156\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.04405\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04111\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04040\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02577\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02116\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02051\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01835\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01788\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01333\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01308\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01217\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00845\n",
- " -- Time: 3.83 seconds\n",
- " -- Linear: model.layers.8.self_attn.o_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.20113\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.18273\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.17538\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.15457\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.09416\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.08692\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.11035\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.10015\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.09581\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.08035\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.07590\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.05670\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.04819\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04548\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04482\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02840\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02389\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02333\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02116\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02074\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01511\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01540\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01422\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01094\n",
- " -- Time: 6.22 seconds\n",
- " -- Layer: model.layers.8 (MLP)\n",
- " -- Linear: model.layers.8.mlp.gate_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.16797\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.15806\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.15463\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.14046\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.07949\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.07596\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.08895\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.08186\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.08023\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.07119\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.06813\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.04545\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.03924\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.03820\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.03795\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02274\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01983\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01959\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01817\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01801\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01210\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01225\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01176\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00845\n",
- " -- Time: 16.76 seconds\n",
- " -- Linear: model.layers.8.mlp.up_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.23683\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.22306\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.21845\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.19867\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.11225\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.10738\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12523\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11508\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.11321\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.10052\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.09621\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06397\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05514\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05385\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05353\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03199\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02772\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02740\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02534\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02515\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01690\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01674\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01646\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01114\n",
- " -- Time: 16.72 seconds\n",
- " -- Linear: model.layers.8.mlp.down_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.18 bpw rfn_error: 0.21367\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.19327\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.18532\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.16520\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.09845\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.71 bpw rfn_error: 0.09059\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.11554\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.10506\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.18 bpw rfn_error: 0.10081\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.08507\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.08090\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.05900\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05049\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04752\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.32 bpw rfn_error: 0.04679\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02977\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02519\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.34 bpw rfn_error: 0.02484\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02262\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02216\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01638\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01655\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.23 bpw rfn_error: 0.01541\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01205\n",
- " -- Time: 26.78 seconds\n",
- " -- Layer: model.layers.9 (Attention)\n",
- " -- Linear: model.layers.9.self_attn.q_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.09374\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.08343\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.07735\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.06843\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.04327\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.03822\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.05469\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.04960\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.04431\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.03654\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.03500\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.02782\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02380\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02096\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02024\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01396\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01116\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01065\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.00987\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.00942\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00745\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00763\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00645\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00545\n",
- " -- Time: 6.22 seconds\n",
- " -- Linear: model.layers.9.self_attn.k_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.08149\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.07163\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.06561\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.05786\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.03729\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.03228\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.04811\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.04369\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.03838\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.03122\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.03000\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.02446\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02089\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.01795\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.01722\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01224\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.00937\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.00884\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.00818\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.00768\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00637\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00628\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00533\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00413\n",
- " -- Time: 3.79 seconds\n",
- " -- Linear: model.layers.9.self_attn.v_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.19517\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.17711\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.17032\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.15017\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.09130\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.08444\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.10496\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.09629\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.09271\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.07744\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.07277\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.05330\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.04590\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04365\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04312\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02659\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02217\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02165\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01932\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01895\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01363\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01317\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01279\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00804\n",
- " -- Time: 3.80 seconds\n",
- " -- Linear: model.layers.9.self_attn.o_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.20153\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.18239\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.17453\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.15316\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.09457\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.08706\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.11063\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.10074\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.09616\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.07951\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.07566\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.05678\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.04841\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04560\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04491\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02841\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02377\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02318\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02077\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02032\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01498\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01511\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01403\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01042\n",
- " -- Time: 6.20 seconds\n",
- " -- Layer: model.layers.9 (MLP)\n",
- " -- Linear: model.layers.9.mlp.gate_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.16709\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.15677\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.15316\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.13897\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.07902\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.07526\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.08855\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.08146\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.07979\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.07048\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.06740\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.04528\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.03912\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.03803\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.03776\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02269\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01986\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01961\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01817\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01801\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01216\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01244\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01179\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00879\n",
- " -- Time: 16.77 seconds\n",
- " -- Linear: model.layers.9.mlp.up_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.23248\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.21835\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.21353\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.19385\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10999\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.10496\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12290\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11303\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.11108\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09819\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.09389\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06283\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05418\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05282\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05250\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03146\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02733\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02699\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02495\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02474\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01676\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01671\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01630\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01137\n",
- " -- Time: 16.69 seconds\n",
- " -- Linear: model.layers.9.mlp.down_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.18 bpw rfn_error: 0.22017\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.19862\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.19033\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.16911\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10144\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.71 bpw rfn_error: 0.09320\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.11938\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.10841\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.18 bpw rfn_error: 0.10391\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.08730\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.08272\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06067\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05196\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04883\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.32 bpw rfn_error: 0.04807\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03050\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02563\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.34 bpw rfn_error: 0.02526\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02287\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02239\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01654\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01652\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.23 bpw rfn_error: 0.01550\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01166\n",
- " -- Time: 26.75 seconds\n",
- " -- Layer: model.layers.10 (Attention)\n",
- " -- Linear: model.layers.10.self_attn.q_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.08235\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.07363\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.06866\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.06023\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.03814\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.03398\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.04728\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.04319\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.03894\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.03206\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.03048\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.02402\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02064\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.01838\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.01782\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01202\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.00962\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.00919\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.00842\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.00805\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00633\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00634\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00553\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00428\n",
- " -- Time: 6.20 seconds\n",
- " -- Linear: model.layers.10.self_attn.k_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.07516\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.06653\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.06168\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.05393\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.03452\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.03046\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.04334\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.03936\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.03536\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.02875\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.02734\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.02195\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.01879\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.01658\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.01603\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01097\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.00858\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.00817\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.00742\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.00705\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00568\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00556\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00487\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00357\n",
- " -- Time: 3.77 seconds\n",
- " -- Linear: model.layers.10.self_attn.v_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.18187\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.16366\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.15689\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.13720\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.08496\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.07789\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.09894\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.08995\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.08648\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.07118\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.06690\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.05035\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.04296\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04066\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04012\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02514\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02081\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02025\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01800\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01762\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01297\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01259\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01205\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00796\n",
- " -- Time: 3.83 seconds\n",
- " -- Linear: model.layers.10.self_attn.o_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.19772\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.17857\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.17053\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.14967\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.09223\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.08466\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.10913\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.09919\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.09394\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.07816\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.07389\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.05624\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.04783\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04464\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04390\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02824\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02368\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02304\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02093\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02045\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01527\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01561\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01425\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01133\n",
- " -- Time: 6.18 seconds\n",
- " -- Layer: model.layers.10 (MLP)\n",
- " -- Linear: model.layers.10.mlp.gate_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.16458\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.15345\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.14950\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.13525\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.07763\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.07360\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.08755\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.08045\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.07854\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.06883\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.06571\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.04474\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.03864\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.03735\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.03705\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02242\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01952\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01924\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01778\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01760\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01199\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01228\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01155\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00867\n",
- " -- Time: 16.72 seconds\n",
- " -- Linear: model.layers.10.mlp.up_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.22519\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.21022\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.20510\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.18548\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10629\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.10090\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.11927\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.10970\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.10747\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09419\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.08985\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06086\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05254\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05097\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05060\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03043\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02627\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02590\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02382\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02359\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01601\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01597\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01546\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01065\n",
- " -- Time: 16.67 seconds\n",
- " -- Linear: model.layers.10.mlp.down_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.18 bpw rfn_error: 0.23070\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.20868\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.20041\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.17776\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10675\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.71 bpw rfn_error: 0.09841\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12502\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11343\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.18 bpw rfn_error: 0.10921\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09194\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.08704\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06390\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05452\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05149\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.32 bpw rfn_error: 0.05076\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03217\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02720\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.34 bpw rfn_error: 0.02684\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02433\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02387\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01749\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01771\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.23 bpw rfn_error: 0.01646\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01279\n",
- " -- Time: 26.77 seconds\n",
- " -- Layer: model.layers.11 (Attention)\n",
- " -- Linear: model.layers.11.self_attn.q_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.09637\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.08586\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.08008\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.07014\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.04477\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.03986\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.05541\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.05047\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.04572\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.03744\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.03559\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.02819\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02421\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02166\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02103\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01414\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01146\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01098\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01004\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.00963\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00750\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00769\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00661\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00542\n",
- " -- Time: 6.22 seconds\n",
- " -- Linear: model.layers.11.self_attn.k_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.08114\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.07195\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.06606\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.05768\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.03737\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.03267\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.04767\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.04342\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.03824\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.03114\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.02972\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.02425\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02075\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.01802\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.01732\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01210\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.00943\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.00893\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.00821\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.00775\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00635\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00631\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00541\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00423\n",
- " -- Time: 3.82 seconds\n",
- " -- Linear: model.layers.11.self_attn.v_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.19703\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.17551\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.16728\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.14535\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.09159\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.08328\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.10811\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.09762\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.09342\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.07590\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.07111\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.05511\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.04670\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04391\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04323\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02758\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02246\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02183\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01924\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01879\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01423\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01368\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01300\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00864\n",
- " -- Time: 3.92 seconds\n",
- " -- Linear: model.layers.11.self_attn.o_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.19597\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.17847\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.16379\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.14511\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.09237\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.08126\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12329\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11030\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.09394\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.07962\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.07759\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06377\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05341\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04512\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04298\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03193\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02434\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02296\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02192\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02058\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01710\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01736\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01424\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01256\n",
- " -- Time: 6.20 seconds\n",
- " -- Layer: model.layers.11 (MLP)\n",
- " -- Linear: model.layers.11.mlp.gate_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.16838\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.15674\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.15241\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.13774\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.07955\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.07520\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.09017\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.08269\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.08051\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.07030\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.06713\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.04617\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.03980\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.03838\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.03804\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02317\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02025\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01994\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01844\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01824\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01252\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01300\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01203\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00944\n",
- " -- Time: 16.79 seconds\n",
- " -- Linear: model.layers.11.mlp.up_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.22740\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.21181\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.20640\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.18641\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10722\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.10159\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12081\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11093\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.10846\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09479\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.09032\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06150\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05311\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05143\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05102\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03077\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02649\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02609\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02397\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02371\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01611\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01609\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01551\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01069\n",
- " -- Time: 16.77 seconds\n",
- " -- Linear: model.layers.11.mlp.down_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.18 bpw rfn_error: 0.22960\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.20772\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.19942\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.17705\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10644\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.71 bpw rfn_error: 0.09805\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12519\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11322\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.18 bpw rfn_error: 0.10894\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09175\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.08694\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06400\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05447\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05138\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.32 bpw rfn_error: 0.05062\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03219\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02714\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.34 bpw rfn_error: 0.02677\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02429\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02381\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01739\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01767\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.23 bpw rfn_error: 0.01632\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01276\n",
- " -- Time: 26.84 seconds\n",
- " -- Layer: model.layers.12 (Attention)\n",
- " -- Linear: model.layers.12.self_attn.q_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.10068\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.08938\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.08243\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.07225\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.04673\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.04101\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.06037\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.05396\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.04786\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.03913\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.03765\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03088\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02602\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02269\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02185\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01557\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01216\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01156\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01071\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01017\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00838\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00843\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00714\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00606\n",
- " -- Time: 6.25 seconds\n",
- " -- Linear: model.layers.12.self_attn.k_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.08574\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.07553\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.06922\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.06050\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.03941\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.03421\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.05104\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.04590\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.04042\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.03277\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.03136\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.02597\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02200\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.01905\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.01829\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01302\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01004\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.00951\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.00876\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.00826\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00686\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00683\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00579\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00469\n",
- " -- Time: 3.85 seconds\n",
- " -- Linear: model.layers.12.self_attn.v_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.19504\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.17522\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.16722\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.14655\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.09135\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.08336\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.10798\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.09762\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.09299\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.07656\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.07215\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.05508\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.04669\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04381\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04310\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02753\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02244\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02178\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01939\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01891\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01422\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01369\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01304\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00864\n",
- " -- Time: 3.84 seconds\n",
- " -- Linear: model.layers.12.self_attn.o_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.21198\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.18789\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.17851\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.15663\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.09908\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.08925\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.11771\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.10642\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.10141\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.08235\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.07812\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06074\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05148\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04812\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04730\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03056\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02559\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02480\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02233\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02180\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01657\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01693\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01547\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01239\n",
- " -- Time: 6.23 seconds\n",
- " -- Layer: model.layers.12 (MLP)\n",
- " -- Linear: model.layers.12.mlp.gate_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.17317\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.16089\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.15653\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.14139\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.08192\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.07739\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.09302\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.08500\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.08295\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.07225\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.06919\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.04771\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.04103\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.03965\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.03932\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02403\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02109\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02078\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01922\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01902\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01327\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01375\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01282\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01025\n",
- " -- Time: 16.80 seconds\n",
- " -- Linear: model.layers.12.mlp.up_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.23561\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.21922\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.21368\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.19274\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.11136\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.10544\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12512\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11497\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.11266\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09816\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.09356\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06397\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05511\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05348\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05310\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03199\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02766\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02727\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02501\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02476\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01693\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01695\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01637\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01149\n",
- " -- Time: 16.72 seconds\n",
- " -- Linear: model.layers.12.mlp.down_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.18 bpw rfn_error: 0.23220\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.20919\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.19921\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.17615\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10738\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.71 bpw rfn_error: 0.09804\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12854\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11635\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.18 bpw rfn_error: 0.11008\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09202\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.08705\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06542\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05593\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05194\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.32 bpw rfn_error: 0.05095\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03304\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02763\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.34 bpw rfn_error: 0.02712\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02468\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02405\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01806\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01837\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.23 bpw rfn_error: 0.01671\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01338\n",
- " -- Time: 26.78 seconds\n",
- " -- Layer: model.layers.13 (Attention)\n",
- " -- Linear: model.layers.13.self_attn.q_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.10346\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.09299\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.08766\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.07699\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.04820\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.04354\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.05839\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.05322\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.04910\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.04058\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.03850\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.02967\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02545\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02319\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02264\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01485\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01207\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01163\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01055\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01019\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00778\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00777\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00696\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00522\n",
- " -- Time: 6.21 seconds\n",
- " -- Linear: model.layers.13.self_attn.k_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.08538\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.07664\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.07156\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.06280\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.03961\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.03533\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.04917\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.04484\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.04036\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.03339\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.03179\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.02494\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02147\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.01905\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.01847\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01246\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.00989\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.00945\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.00865\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.00825\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00651\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00643\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00569\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00422\n",
- " -- Time: 3.82 seconds\n",
- " -- Linear: model.layers.13.self_attn.v_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.21169\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.19023\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.18235\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.15968\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.09888\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.09079\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.11536\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.10443\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.10062\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.08274\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.07782\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.05883\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.04983\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04732\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04671\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02938\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02406\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02345\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02073\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02031\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01508\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01435\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01392\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00878\n",
- " -- Time: 3.81 seconds\n",
- " -- Linear: model.layers.13.self_attn.o_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.22985\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.20997\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.20194\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.18065\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10763\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.09973\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12558\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11441\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.10930\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09282\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.08926\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06461\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05522\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05212\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05139\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03240\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02762\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02700\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02472\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02425\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01745\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01809\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01647\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01319\n",
- " -- Time: 6.19 seconds\n",
- " -- Layer: model.layers.13 (MLP)\n",
- " -- Linear: model.layers.13.mlp.gate_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.17483\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.16251\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.15795\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.14266\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.08279\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.07819\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.09385\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.08610\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.08379\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.07304\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.06979\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.04815\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.04151\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.03999\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.03964\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02414\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02113\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02080\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01922\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01901\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01303\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01362\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01251\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00993\n",
- " -- Time: 16.77 seconds\n",
- " -- Linear: model.layers.13.mlp.up_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.23982\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.22323\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.21751\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.19635\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.11347\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.10742\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12767\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11723\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.11476\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.10007\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.09532\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06526\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05620\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05448\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05408\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03266\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02815\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02773\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02544\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02519\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01729\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01721\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01670\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01159\n",
- " -- Time: 16.72 seconds\n",
- " -- Linear: model.layers.13.mlp.down_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.18 bpw rfn_error: 0.24641\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.22109\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.21158\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.18667\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.11395\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.71 bpw rfn_error: 0.10431\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.13424\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.12162\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.18 bpw rfn_error: 0.11690\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09716\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.09181\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06867\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05844\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05492\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.32 bpw rfn_error: 0.05408\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03458\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02889\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.34 bpw rfn_error: 0.02847\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02562\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02507\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01873\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01869\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.23 bpw rfn_error: 0.01756\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01328\n",
- " -- Time: 26.78 seconds\n",
- " -- Layer: model.layers.14 (Attention)\n",
- " -- Linear: model.layers.14.self_attn.q_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.10582\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.09534\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.08908\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.07862\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.04931\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.04419\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.06149\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.05579\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.05026\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.04186\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.04004\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03130\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02671\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02382\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02309\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01568\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01253\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01200\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01107\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01060\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00827\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00834\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00722\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00576\n",
- " -- Time: 6.21 seconds\n",
- " -- Linear: model.layers.14.self_attn.k_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.08561\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.07738\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.07080\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.06264\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.03965\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.03475\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.05197\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.04725\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.04042\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.03395\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.03267\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.02629\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02257\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.01913\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.01824\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01316\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01000\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.00941\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.00888\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.00832\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00689\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00678\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00572\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00448\n",
- " -- Time: 3.83 seconds\n",
- " -- Linear: model.layers.14.self_attn.v_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.22144\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.19968\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.19127\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.16781\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10366\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.09508\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12296\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11022\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.10547\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.08733\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.08233\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06295\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05278\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04971\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04898\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03148\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02549\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02481\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02217\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02167\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01627\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01558\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01481\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00998\n",
- " -- Time: 3.84 seconds\n",
- " -- Linear: model.layers.14.self_attn.o_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.24642\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.21851\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.20690\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.18059\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.11514\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.10416\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.13897\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.12539\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.11840\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09537\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.09121\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.07166\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.06063\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05589\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05472\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03587\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02969\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02881\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02585\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02511\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01920\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01975\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01767\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01432\n",
- " -- Time: 6.21 seconds\n",
- " -- Layer: model.layers.14 (MLP)\n",
- " -- Linear: model.layers.14.mlp.gate_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.16493\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.15358\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.14900\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.13483\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.07826\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.07376\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.08938\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.08197\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.07921\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.06925\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.06623\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.04587\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.03957\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.03788\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.03748\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02303\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02014\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01981\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01840\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01816\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01248\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01319\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01191\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00978\n",
- " -- Time: 16.75 seconds\n",
- " -- Linear: model.layers.14.mlp.up_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.23642\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.22076\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.21511\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.19454\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.11210\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.10626\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12655\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11623\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.11338\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09929\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.09471\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06476\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05580\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05394\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05350\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03243\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02805\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02763\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02549\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02521\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01728\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01745\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01663\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01207\n",
- " -- Time: 16.72 seconds\n",
- " -- Linear: model.layers.14.mlp.down_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.18 bpw rfn_error: 0.23204\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.20811\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.19811\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.17565\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10718\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.71 bpw rfn_error: 0.09761\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12842\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11596\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.18 bpw rfn_error: 0.10998\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09171\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.08705\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06573\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05587\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05206\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.32 bpw rfn_error: 0.05104\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03340\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02796\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.34 bpw rfn_error: 0.02744\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02504\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02438\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01849\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01893\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.23 bpw rfn_error: 0.01699\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01411\n",
- " -- Time: 26.80 seconds\n",
- " -- Layer: model.layers.15 (Attention)\n",
- " -- Linear: model.layers.15.self_attn.q_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.11824\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.10740\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.10090\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.08950\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.05527\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.04995\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.06887\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.06209\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.05624\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.04743\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.04546\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03518\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02984\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02678\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02602\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01771\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01425\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01369\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01273\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01224\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00947\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00962\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00832\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00688\n",
- " -- Time: 6.22 seconds\n",
- " -- Linear: model.layers.15.self_attn.k_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.08997\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.08158\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.07426\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.06600\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.04151\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.03625\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.05554\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.05021\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.04234\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.03593\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.03491\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.02827\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02404\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02007\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.01901\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01418\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01052\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.00984\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.00942\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.00876\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00742\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00723\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00604\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00478\n",
- " -- Time: 3.78 seconds\n",
- " -- Linear: model.layers.15.self_attn.v_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.23763\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.21808\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.21011\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.18801\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.11228\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.10438\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.13130\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11890\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.11398\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09722\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.09255\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06716\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05694\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05385\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05308\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03358\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02751\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02687\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02442\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02393\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01729\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01660\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01602\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01042\n",
- " -- Time: 3.83 seconds\n",
- " -- Linear: model.layers.15.self_attn.o_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.22281\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.20108\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.19287\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.16640\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10428\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.09603\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12125\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.10998\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.10599\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.08663\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.08045\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06233\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05283\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05024\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04962\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03121\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02622\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02562\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02269\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02227\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01662\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01658\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01577\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01154\n",
- " -- Time: 6.20 seconds\n",
- " -- Layer: model.layers.15 (MLP)\n",
- " -- Linear: model.layers.15.mlp.gate_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.16069\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.15021\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.14602\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.13239\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.07632\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.07225\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.08692\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.07965\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.07721\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.06788\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.06506\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.04466\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.03849\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.03699\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.03663\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02248\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01974\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01943\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01811\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01790\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01236\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01298\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01187\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00972\n",
- " -- Time: 16.77 seconds\n",
- " -- Linear: model.layers.15.mlp.up_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.23888\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.22394\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.21869\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.19820\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.11323\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.10775\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12727\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11700\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.11440\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.10084\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.09639\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06496\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05602\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05432\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05392\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03248\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02794\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02754\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02541\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02515\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01710\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01689\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01651\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01114\n",
- " -- Time: 16.71 seconds\n",
- " -- Linear: model.layers.15.mlp.down_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.18 bpw rfn_error: 0.22929\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.20544\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.19581\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.17408\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10567\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.71 bpw rfn_error: 0.09612\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12543\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11389\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.18 bpw rfn_error: 0.10852\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09041\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.08577\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06395\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05471\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05095\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.32 bpw rfn_error: 0.05003\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03227\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02688\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.34 bpw rfn_error: 0.02643\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02392\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02334\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01763\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01757\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.23 bpw rfn_error: 0.01639\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01254\n",
- " -- Time: 26.78 seconds\n",
- " -- Layer: model.layers.16 (Attention)\n",
- " -- Linear: model.layers.16.self_attn.q_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.11746\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.10737\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.10179\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.09062\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.05488\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.05019\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.06660\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.06065\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.05572\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.04742\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.04533\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03389\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02900\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02641\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02577\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01698\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01376\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01328\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01228\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01187\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00893\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00889\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00799\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00601\n",
- " -- Time: 6.24 seconds\n",
- " -- Linear: model.layers.16.self_attn.k_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.09286\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.08501\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.07839\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.07006\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.04299\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.03820\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.05563\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.05094\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.04370\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.03752\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.03612\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.02812\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02428\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02069\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.01978\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01404\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01075\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01012\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.00966\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.00904\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00733\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00717\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00613\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00456\n",
- " -- Time: 3.89 seconds\n",
- " -- Linear: model.layers.16.self_attn.v_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.24342\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.22526\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.21816\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.19590\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.11499\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.10791\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.13360\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.12081\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.11661\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.10061\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.09606\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06830\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05777\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05512\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05448\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03414\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02804\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02746\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02504\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02462\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01754\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01666\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01634\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01022\n",
- " -- Time: 3.85 seconds\n",
- " -- Linear: model.layers.16.self_attn.o_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.23516\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.21166\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.20336\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.18091\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10959\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.10060\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12675\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11556\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.11176\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09287\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.08821\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06515\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05577\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05310\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05242\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03287\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02825\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02758\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02502\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02460\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01800\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01859\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01714\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01375\n",
- " -- Time: 6.22 seconds\n",
- " -- Layer: model.layers.16 (MLP)\n",
- " -- Linear: model.layers.16.mlp.gate_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.15193\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.14219\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.13810\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.12526\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.07207\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.06821\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.08244\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.07556\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.07291\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.06421\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.06152\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.04227\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.03647\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.03490\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.03452\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02124\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01855\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01824\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01703\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01680\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01156\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01215\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01102\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00899\n",
- " -- Time: 16.85 seconds\n",
- " -- Linear: model.layers.16.mlp.up_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.23238\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.21834\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.21318\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.19339\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.11013\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.10490\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12404\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11403\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.11123\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09835\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.09399\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06329\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05459\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05284\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05242\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03164\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02718\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02679\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02477\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02451\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01664\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01645\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01604\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01085\n",
- " -- Time: 16.73 seconds\n",
- " -- Linear: model.layers.16.mlp.down_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.18 bpw rfn_error: 0.22319\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.19859\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.18842\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.16736\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10245\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.71 bpw rfn_error: 0.09253\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12249\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11134\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.18 bpw rfn_error: 0.10546\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.08704\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.08280\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06257\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05346\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04942\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.32 bpw rfn_error: 0.04840\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03161\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02606\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.34 bpw rfn_error: 0.02557\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02307\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02243\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01725\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01710\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.23 bpw rfn_error: 0.01591\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01215\n",
- " -- Time: 26.79 seconds\n",
- " -- Layer: model.layers.17 (Attention)\n",
- " -- Linear: model.layers.17.self_attn.q_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.09829\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.09000\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.08431\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.07568\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.04579\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.04136\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.05741\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.05237\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.04662\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.04001\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.03857\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.02920\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02505\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02210\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02135\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01463\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01162\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01111\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01049\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01001\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00768\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00777\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00663\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00532\n",
- " -- Time: 6.19 seconds\n",
- " -- Linear: model.layers.17.self_attn.k_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.08335\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.07599\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.06954\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.06252\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.03844\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.03384\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.05100\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.04629\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.03928\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.03366\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.03269\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.02575\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02205\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.01855\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.01764\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01288\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.00970\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.00911\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.00875\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.00817\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00672\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00661\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00549\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00433\n",
- " -- Time: 3.90 seconds\n",
- " -- Linear: model.layers.17.self_attn.v_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.20309\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.18364\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.17341\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.15613\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.09403\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.08522\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.11704\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.10452\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.09631\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.08153\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.07840\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.05969\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.04993\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04516\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04397\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02979\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02319\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02230\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02063\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01985\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01531\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01458\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01324\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00917\n",
- " -- Time: 3.81 seconds\n",
- " -- Linear: model.layers.17.self_attn.o_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.22796\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.20518\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.19632\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.17325\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10651\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.09758\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12522\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11352\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.10867\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09042\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.08499\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06454\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05483\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05163\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05087\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03239\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02747\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02676\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02437\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02388\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01756\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01816\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01653\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01336\n",
- " -- Time: 6.20 seconds\n",
- " -- Layer: model.layers.17 (MLP)\n",
- " -- Linear: model.layers.17.mlp.gate_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.15907\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.14871\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.14446\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.13126\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.07530\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.07126\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.08597\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.07891\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.07619\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.06721\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.06452\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.04402\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.03805\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.03643\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.03605\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02213\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01933\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01901\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01776\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01752\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01206\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01261\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01151\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00929\n",
- " -- Time: 16.79 seconds\n",
- " -- Linear: model.layers.17.mlp.up_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.24069\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.22613\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.22082\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.20047\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.11399\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.10863\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12831\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11795\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.11515\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.10196\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.09775\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06561\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05659\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05480\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05438\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03293\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02847\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02808\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02606\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02580\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01763\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01763\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01703\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01214\n",
- " -- Time: 16.71 seconds\n",
- " -- Linear: model.layers.17.mlp.down_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.18 bpw rfn_error: 0.23679\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.20960\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.19847\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.17539\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10882\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.71 bpw rfn_error: 0.09788\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.13094\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11836\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.18 bpw rfn_error: 0.11219\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09183\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.08713\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06688\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05691\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05259\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.32 bpw rfn_error: 0.05153\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03377\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02791\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.34 bpw rfn_error: 0.02738\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02464\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02395\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01851\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01852\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.23 bpw rfn_error: 0.01705\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01337\n",
- " -- Time: 26.73 seconds\n",
- " -- Layer: model.layers.18 (Attention)\n",
- " -- Linear: model.layers.18.self_attn.q_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.11366\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.10467\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.09871\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.08852\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.05320\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.04850\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.06644\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.06008\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.05405\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.04669\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.04498\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03393\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02889\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02578\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02500\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01706\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01370\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01318\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01243\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01196\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00911\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00927\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00797\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00663\n",
- " -- Time: 6.22 seconds\n",
- " -- Linear: model.layers.18.self_attn.k_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.09304\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.08449\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.07639\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.06885\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.04281\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.03714\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.05762\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.05275\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.04376\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.03751\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.03660\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.02914\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02510\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02066\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.01948\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01457\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01083\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01007\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.00979\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.00904\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00763\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00751\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00612\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00488\n",
- " -- Time: 3.87 seconds\n",
- " -- Linear: model.layers.18.self_attn.v_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.22252\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.20193\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.19171\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.17346\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10408\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.09482\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12835\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11432\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.10636\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09040\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.08736\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06581\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05469\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04999\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04883\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03301\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02564\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02473\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02284\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02208\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01701\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01596\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01485\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01001\n",
- " -- Time: 3.83 seconds\n",
- " -- Linear: model.layers.18.self_attn.o_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.19772\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.17592\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.16816\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.14555\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.09180\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.08353\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.10693\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.09711\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.09339\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.07592\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.07098\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.05513\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.04712\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04478\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04419\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02785\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02420\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02365\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02121\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02086\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01553\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01642\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01477\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01263\n",
- " -- Time: 6.20 seconds\n",
- " -- Layer: model.layers.18 (MLP)\n",
- " -- Linear: model.layers.18.mlp.gate_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.14728\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.13762\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.13347\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.12128\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.06964\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.06577\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.07982\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.07330\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.07050\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.06210\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.05950\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.04083\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.03526\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.03363\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.03324\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02047\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01771\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01739\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01623\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01599\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01103\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01140\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01048\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00820\n",
- " -- Time: 16.83 seconds\n",
- " -- Linear: model.layers.18.mlp.up_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.22881\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.21485\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.20956\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.19036\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10830\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.10303\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12241\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11246\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.10942\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09681\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.09261\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06251\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05390\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05203\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05159\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03132\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02690\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02650\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02458\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02430\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01667\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01650\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01603\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01111\n",
- " -- Time: 16.73 seconds\n",
- " -- Linear: model.layers.18.mlp.down_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.18 bpw rfn_error: 0.22510\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.19820\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.18676\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.16481\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10319\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.71 bpw rfn_error: 0.09216\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12568\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11338\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.18 bpw rfn_error: 0.10659\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.08683\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.08259\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06434\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05471\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05003\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.32 bpw rfn_error: 0.04886\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03262\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02680\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.34 bpw rfn_error: 0.02623\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02369\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02295\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01807\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01818\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.23 bpw rfn_error: 0.01649\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01341\n",
- " -- Time: 26.79 seconds\n",
- " -- Layer: model.layers.19 (Attention)\n",
- " -- Linear: model.layers.19.self_attn.q_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.11644\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.10768\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.10152\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.09162\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.05452\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.04974\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.06825\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.06184\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.05541\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.04821\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.04669\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03480\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02965\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02633\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02550\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01744\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01385\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01330\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01260\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01210\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00928\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00923\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00810\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00638\n",
- " -- Time: 6.23 seconds\n",
- " -- Linear: model.layers.19.self_attn.k_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.09202\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.08449\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.07618\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.06889\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.04248\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.03691\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.05797\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.05307\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.04333\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.03766\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.03691\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.02941\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02524\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02054\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.01927\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01472\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01077\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.00997\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.00982\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.00903\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00773\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00752\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00611\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00488\n",
- " -- Time: 3.83 seconds\n",
- " -- Linear: model.layers.19.self_attn.v_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.23251\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.21308\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.20404\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.18453\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10898\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.10053\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.13124\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11786\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.11101\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09542\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.09173\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06728\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05643\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05229\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05128\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03361\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02663\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02584\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02383\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02317\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01721\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01620\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01532\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00993\n",
- " -- Time: 3.84 seconds\n",
- " -- Linear: model.layers.19.self_attn.o_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.21240\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.19376\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.18742\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.16420\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.09921\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.09250\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.11509\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.10345\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.10047\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.08449\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.07993\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.05928\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05036\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04845\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04797\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03000\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02635\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02590\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02359\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02329\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01684\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01795\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01624\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01403\n",
- " -- Time: 6.19 seconds\n",
- " -- Layer: model.layers.19 (MLP)\n",
- " -- Linear: model.layers.19.mlp.gate_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.14325\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.13404\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.13019\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.11838\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.06772\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.06406\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.07722\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.07102\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.06852\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.06046\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.05793\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03943\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.03415\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.03266\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.03230\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01978\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01714\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01685\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01571\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01549\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01061\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01094\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01011\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00779\n",
- " -- Time: 16.78 seconds\n",
- " -- Linear: model.layers.19.mlp.up_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.22417\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.21056\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.20544\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.18681\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10609\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.10102\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.11969\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11008\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.10719\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09500\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.09090\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06112\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05278\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05099\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05057\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03066\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02648\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02611\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02426\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02399\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01647\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01641\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01587\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01129\n",
- " -- Time: 16.74 seconds\n",
- " -- Linear: model.layers.19.mlp.down_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.18 bpw rfn_error: 0.21211\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.18632\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.17439\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.15341\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.09733\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.71 bpw rfn_error: 0.08616\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12003\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.10839\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.18 bpw rfn_error: 0.10058\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.08150\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.07764\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06150\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05253\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04730\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.32 bpw rfn_error: 0.04602\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03137\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02558\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.34 bpw rfn_error: 0.02490\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02262\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02176\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01772\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01772\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.23 bpw rfn_error: 0.01604\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01321\n",
- " -- Time: 26.78 seconds\n",
- " -- Layer: model.layers.20 (Attention)\n",
- " -- Linear: model.layers.20.self_attn.q_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.11630\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.10727\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.09987\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.09052\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.05425\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.04880\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.07019\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.06342\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.05525\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.04814\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.04683\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03576\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.03035\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02627\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02523\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01794\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01393\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01325\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01273\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01209\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00956\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00953\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00808\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00664\n",
- " -- Time: 6.23 seconds\n",
- " -- Linear: model.layers.20.self_attn.k_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.09284\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.08449\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.07454\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.06771\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.04260\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.03601\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.06061\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.05522\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.04370\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.03771\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.03725\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03066\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02624\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02060\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.01910\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01531\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01089\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.00993\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.00995\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.00897\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00802\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00785\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00608\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00510\n",
- " -- Time: 3.83 seconds\n",
- " -- Linear: model.layers.20.self_attn.v_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.23120\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.20926\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.19857\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.17941\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10756\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.09769\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.13221\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11821\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.10996\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09330\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.09015\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06796\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05649\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05160\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05039\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03402\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02643\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02547\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02351\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02271\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01746\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01641\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01515\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01021\n",
- " -- Time: 3.83 seconds\n",
- " -- Linear: model.layers.20.self_attn.o_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.23250\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.20446\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.19377\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.16236\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10847\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.09762\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12756\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11605\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.11059\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.08639\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.08040\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06563\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05604\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05256\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05170\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03302\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02801\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02722\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02378\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02324\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01788\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01861\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01677\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01376\n",
- " -- Time: 6.22 seconds\n",
- " -- Layer: model.layers.20 (MLP)\n",
- " -- Linear: model.layers.20.mlp.gate_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.15034\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.14114\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.13758\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.12504\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.07114\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.06767\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.08029\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.07404\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.07192\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.06368\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.06092\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.04098\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.03555\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.03425\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.03395\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02052\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01788\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01761\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01639\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01620\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01092\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01124\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01049\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00788\n",
- " -- Time: 16.78 seconds\n",
- " -- Linear: model.layers.20.mlp.up_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.23300\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.21932\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.21445\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.19492\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.11020\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.10522\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12351\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11371\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.11129\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09876\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.09448\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06288\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05443\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05285\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05248\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03154\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02717\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02682\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02484\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02461\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01673\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01640\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01619\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01081\n",
- " -- Time: 16.73 seconds\n",
- " -- Linear: model.layers.20.mlp.down_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.18 bpw rfn_error: 0.21185\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.18673\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.17575\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.15506\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.09714\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.71 bpw rfn_error: 0.08663\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.11739\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.10685\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.18 bpw rfn_error: 0.10019\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.08144\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.07747\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06004\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05138\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04694\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.32 bpw rfn_error: 0.04584\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03042\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02490\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.34 bpw rfn_error: 0.02435\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02188\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02117\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01677\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01660\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.23 bpw rfn_error: 0.01531\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01191\n",
- " -- Time: 26.79 seconds\n",
- " -- Layer: model.layers.21 (Attention)\n",
- " -- Linear: model.layers.21.self_attn.q_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.11127\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.10196\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.09483\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.08600\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.05187\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.04645\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.06690\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.06061\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.05291\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.04576\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.04454\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03412\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02899\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02507\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02407\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01714\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01319\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01253\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01199\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01136\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00913\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00893\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00773\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00607\n",
- " -- Time: 6.20 seconds\n",
- " -- Linear: model.layers.21.self_attn.k_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.09378\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.08510\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.07577\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.06884\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.04326\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.03671\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.06057\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.05497\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.04434\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.03813\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.03759\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03076\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02632\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02092\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.01945\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01535\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01100\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01008\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.00999\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.00908\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00801\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00781\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00614\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00506\n",
- " -- Time: 3.85 seconds\n",
- " -- Linear: model.layers.21.self_attn.v_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.22163\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.19857\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.18638\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.16824\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10239\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.09164\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12891\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11509\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.10520\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.08830\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.08537\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06626\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05503\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04911\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04761\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03326\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02517\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02403\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02228\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02129\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01708\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01587\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01435\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00975\n",
- " -- Time: 3.84 seconds\n",
- " -- Linear: model.layers.21.self_attn.o_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.23350\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.20365\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.19298\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.16334\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10854\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.09643\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12523\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11522\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.11068\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.08594\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.07940\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06430\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05538\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05234\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05160\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03227\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02745\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02665\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02300\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02251\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01727\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01758\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01624\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01239\n",
- " -- Time: 6.21 seconds\n",
- " -- Layer: model.layers.21 (MLP)\n",
- " -- Linear: model.layers.21.mlp.gate_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.14922\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.14035\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.13702\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.12456\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.07072\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.06739\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.07943\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.07325\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.07139\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.06332\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.06061\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.04061\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.03517\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.03403\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.03376\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02035\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01774\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01750\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01628\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01611\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01090\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01111\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01053\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00778\n",
- " -- Time: 16.78 seconds\n",
- " -- Linear: model.layers.21.mlp.up_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.23998\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.22618\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.22144\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.20132\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.11365\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.10871\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12682\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11691\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.11471\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.10196\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.09745\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06472\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05598\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05453\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05419\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03238\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02806\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02771\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02568\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02546\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01715\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01695\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01667\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01124\n",
- " -- Time: 16.72 seconds\n",
- " -- Linear: model.layers.21.mlp.down_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.18 bpw rfn_error: 0.21060\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.18573\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.17462\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.15507\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.09622\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.71 bpw rfn_error: 0.08574\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.11685\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.10651\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.18 bpw rfn_error: 0.09940\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.08131\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.07743\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.05938\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05114\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04648\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.32 bpw rfn_error: 0.04530\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03010\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02458\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.34 bpw rfn_error: 0.02399\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02170\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02094\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01659\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01635\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.23 bpw rfn_error: 0.01509\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01158\n",
- " -- Time: 26.82 seconds\n",
- " -- Layer: model.layers.22 (Attention)\n",
- " -- Linear: model.layers.22.self_attn.q_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.11209\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.10229\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.09493\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.08603\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.05214\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.04653\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.06759\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.06109\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.05337\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.04590\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.04473\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03453\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02925\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02521\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02418\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01736\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01327\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01260\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01205\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01141\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00927\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00901\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00782\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00614\n",
- " -- Time: 6.22 seconds\n",
- " -- Linear: model.layers.22.self_attn.k_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.10182\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.09235\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.08218\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.07468\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.04701\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.03994\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.06553\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.05968\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.04823\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.04141\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.04076\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03335\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02857\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02278\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02124\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01667\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01206\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01106\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01095\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.00998\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00877\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00861\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00679\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00569\n",
- " -- Time: 3.81 seconds\n",
- " -- Linear: model.layers.22.self_attn.v_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.21928\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.19550\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.18235\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.16446\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10102\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.08950\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12897\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11501\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.10395\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.08672\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.08398\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06603\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05492\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04849\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04685\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03305\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02487\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02364\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02197\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02087\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01693\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01587\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01411\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00978\n",
- " -- Time: 3.84 seconds\n",
- " -- Linear: model.layers.22.self_attn.o_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.21762\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.18789\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.17726\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.15261\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10171\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.09076\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.11871\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.10860\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.10392\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.08076\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.07427\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06082\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05235\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04925\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04849\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03053\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02610\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02537\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02207\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02155\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01645\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01714\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01544\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01247\n",
- " -- Time: 6.22 seconds\n",
- " -- Layer: model.layers.22 (MLP)\n",
- " -- Linear: model.layers.22.mlp.gate_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.16317\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.15349\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.14991\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.13635\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.07727\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.07367\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.08670\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.07996\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.07803\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.06923\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.06627\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.04433\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.03836\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.03715\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.03686\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02217\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01930\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01903\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01769\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01751\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01182\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01197\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01143\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00826\n",
- " -- Time: 16.77 seconds\n",
- " -- Linear: model.layers.22.mlp.up_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.24401\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.22991\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.22512\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.20468\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.11556\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.11050\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12880\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11884\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.11660\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.10366\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.09914\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06572\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05685\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05539\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05504\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03285\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02843\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02807\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02600\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02578\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01734\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01707\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01686\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01120\n",
- " -- Time: 16.74 seconds\n",
- " -- Linear: model.layers.22.mlp.down_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.18 bpw rfn_error: 0.21802\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.19351\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.18269\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.16234\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10003\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.71 bpw rfn_error: 0.08978\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12078\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.10997\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.18 bpw rfn_error: 0.10301\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.08488\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.08076\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06133\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05274\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04827\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.32 bpw rfn_error: 0.04717\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03104\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02547\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.34 bpw rfn_error: 0.02492\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02254\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02183\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01701\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01682\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.23 bpw rfn_error: 0.01556\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01191\n",
- " -- Time: 26.81 seconds\n",
- " -- Layer: model.layers.23 (Attention)\n",
- " -- Linear: model.layers.23.self_attn.q_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.11380\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.10433\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.09742\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.08835\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.05306\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.04771\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.06784\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.06138\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.05416\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.04680\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.04541\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03455\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02936\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02560\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02463\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01730\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01337\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01272\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01213\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01151\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00914\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00890\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00778\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00591\n",
- " -- Time: 6.21 seconds\n",
- " -- Linear: model.layers.23.self_attn.k_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.10156\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.09196\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.08329\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.07560\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.04688\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.04059\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.06309\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.05749\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.04804\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.04111\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.04020\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03200\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02746\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02265\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02140\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01605\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01187\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01105\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01073\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.00993\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00838\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00821\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00671\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00536\n",
- " -- Time: 3.80 seconds\n",
- " -- Linear: model.layers.23.self_attn.v_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.23422\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.21260\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.20214\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.18244\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10900\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.09931\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.13251\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11975\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.11144\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09471\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.09120\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06770\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05719\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05224\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05103\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03382\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02671\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02575\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02379\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02298\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01734\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01653\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01523\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01019\n",
- " -- Time: 3.88 seconds\n",
- " -- Linear: model.layers.23.self_attn.o_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.21480\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.18771\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.17768\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.14708\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.09886\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.08851\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.11611\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.10647\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.10162\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.07861\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.07177\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.05943\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05118\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04773\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04689\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02974\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02508\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02427\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02111\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02054\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01586\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01625\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01473\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01144\n",
- " -- Time: 6.19 seconds\n",
- " -- Layer: model.layers.23 (MLP)\n",
- " -- Linear: model.layers.23.mlp.gate_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.17054\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.16037\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.15662\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.14237\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.08076\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.07698\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.09058\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.08349\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.08155\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.07227\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.06921\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.04622\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.04005\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.03883\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.03853\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02320\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02017\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01990\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01847\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01829\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01246\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01250\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01206\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00863\n",
- " -- Time: 16.78 seconds\n",
- " -- Linear: model.layers.23.mlp.up_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.24915\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.23466\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.22974\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.20875\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.11804\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.11278\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.13139\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.12130\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.11907\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.10571\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.10097\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06703\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05804\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05656\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05621\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03350\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02898\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02862\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02646\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02623\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01764\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01732\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01715\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01125\n",
- " -- Time: 16.71 seconds\n",
- " -- Linear: model.layers.23.mlp.down_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.18 bpw rfn_error: 0.22415\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.19883\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.18751\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.16688\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10277\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.71 bpw rfn_error: 0.09218\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12361\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11317\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.18 bpw rfn_error: 0.10595\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.08731\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.08310\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06306\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05426\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04954\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.32 bpw rfn_error: 0.04837\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03177\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02603\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.34 bpw rfn_error: 0.02545\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02301\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02226\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01716\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01705\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.23 bpw rfn_error: 0.01560\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01188\n",
- " -- Time: 26.73 seconds\n",
- " -- Layer: model.layers.24 (Attention)\n",
- " -- Linear: model.layers.24.self_attn.q_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.12260\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.11279\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.10554\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.09573\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.05739\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.05178\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.07278\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.06617\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.05844\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.05073\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.04924\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03712\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.03172\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02773\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02670\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01860\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01456\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01389\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01324\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01261\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00981\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00976\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00839\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00663\n",
- " -- Time: 6.21 seconds\n",
- " -- Linear: model.layers.24.self_attn.k_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.10946\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.10019\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.09057\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.08224\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.05079\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.04400\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.06904\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.06303\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.05189\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.04493\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.04405\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03504\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.03008\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02456\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02307\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01753\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01283\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01189\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01166\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01073\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00918\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00890\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00729\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00571\n",
- " -- Time: 3.80 seconds\n",
- " -- Linear: model.layers.24.self_attn.v_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.24243\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.22225\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.21292\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.19248\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.11347\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.10475\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.13544\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.12257\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.11565\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09936\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.09544\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06901\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05849\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05435\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05331\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03449\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02771\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02689\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02482\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02415\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01768\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01686\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01590\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01036\n",
- " -- Time: 3.81 seconds\n",
- " -- Linear: model.layers.24.self_attn.o_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.18462\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.16712\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.15897\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.13305\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.08657\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.07936\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.10264\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.09414\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.08770\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.07102\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.06521\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.05243\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.04509\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04164\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04079\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02619\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02157\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02092\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01845\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01788\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01375\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01365\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01258\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00905\n",
- " -- Time: 6.20 seconds\n",
- " -- Layer: model.layers.24 (MLP)\n",
- " -- Linear: model.layers.24.mlp.gate_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.17165\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.16135\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.15763\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.14326\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.08135\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.07753\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.09113\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.08407\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.08214\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.07274\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.06955\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.04653\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.04032\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.03910\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.03881\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02330\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02027\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02000\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01854\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01836\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01242\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01249\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01202\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00855\n",
- " -- Time: 16.76 seconds\n",
- " -- Linear: model.layers.24.mlp.up_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.25261\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.23792\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.23302\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.21172\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.11972\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.11447\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.13373\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.12299\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.12081\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.10716\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.10251\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06826\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05892\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05744\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05708\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03423\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02952\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02916\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02696\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02674\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01830\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01776\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01782\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01172\n",
- " -- Time: 16.70 seconds\n",
- " -- Linear: model.layers.24.mlp.down_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.18 bpw rfn_error: 0.22909\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.20351\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.19234\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.17107\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10504\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.71 bpw rfn_error: 0.09446\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12623\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11529\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.18 bpw rfn_error: 0.10829\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.08941\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.08494\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06397\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05522\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05058\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.32 bpw rfn_error: 0.04942\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03230\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02647\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.34 bpw rfn_error: 0.02589\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02340\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02265\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01751\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01719\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.23 bpw rfn_error: 0.01599\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01182\n",
- " -- Time: 26.72 seconds\n",
- " -- Layer: model.layers.25 (Attention)\n",
- " -- Linear: model.layers.25.self_attn.q_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.12900\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.11907\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.11197\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.10142\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.06049\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.05492\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.07604\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.06910\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.06151\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.05347\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.05184\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03876\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.03306\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02918\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02820\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01940\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01522\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01456\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01382\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01321\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01017\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01003\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00875\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00668\n",
- " -- Time: 6.20 seconds\n",
- " -- Linear: model.layers.25.self_attn.k_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.11070\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.10151\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.09247\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.08384\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.05146\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.04495\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.06873\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.06295\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.05250\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.04551\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.04442\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03481\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.03008\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02483\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02343\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01742\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01292\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01203\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01174\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01086\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00904\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00883\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00729\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00564\n",
- " -- Time: 3.85 seconds\n",
- " -- Linear: model.layers.25.self_attn.v_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.24470\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.22439\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.21520\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.19456\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.11443\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.10578\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.13687\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.12336\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.11654\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.10039\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.09637\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06994\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05896\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05485\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05384\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03487\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02793\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02710\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02503\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02437\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01793\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01695\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01609\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01037\n",
- " -- Time: 3.80 seconds\n",
- " -- Linear: model.layers.25.self_attn.o_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.19971\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.17495\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.16666\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.13776\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.09312\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.08423\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.10770\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.09795\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.09494\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.07392\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.06606\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.05482\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.04697\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04471\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04418\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02750\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02313\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02253\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01925\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01888\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01463\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01430\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01390\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00958\n",
- " -- Time: 6.19 seconds\n",
- " -- Layer: model.layers.25 (MLP)\n",
- " -- Linear: model.layers.25.mlp.gate_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.17795\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.16722\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.16331\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.14839\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.08445\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.08044\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.09471\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.08726\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.08527\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.07546\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.07213\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.04844\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.04190\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04062\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04032\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02428\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02113\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02084\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01933\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01914\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01304\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01312\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01263\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00910\n",
- " -- Time: 16.76 seconds\n",
- " -- Linear: model.layers.25.mlp.up_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.25596\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.24084\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.23583\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.21412\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.12140\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.11602\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.13501\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.12474\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.12254\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.10858\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.10364\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06887\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05970\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05820\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05784\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03440\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02980\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02943\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02717\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02694\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01798\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01778\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01748\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01151\n",
- " -- Time: 16.68 seconds\n",
- " -- Linear: model.layers.25.mlp.down_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.18 bpw rfn_error: 0.23103\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.20522\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.19355\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.17225\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10588\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.71 bpw rfn_error: 0.09500\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12759\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11703\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.18 bpw rfn_error: 0.10922\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09041\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.08588\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06501\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05611\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05101\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.32 bpw rfn_error: 0.04974\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03263\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02675\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.34 bpw rfn_error: 0.02612\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02371\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02290\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01746\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01750\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.23 bpw rfn_error: 0.01578\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01207\n",
- " -- Time: 26.74 seconds\n",
- " -- Layer: model.layers.26 (Attention)\n",
- " -- Linear: model.layers.26.self_attn.q_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.12400\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.11394\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.10634\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.09626\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.05808\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.05220\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.07396\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.06728\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.05918\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.05122\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.04970\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03773\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.03227\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02804\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02695\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01888\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01467\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01396\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01331\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01263\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00986\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00981\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00834\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00655\n",
- " -- Time: 6.20 seconds\n",
- " -- Linear: model.layers.26.self_attn.k_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.10733\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.09786\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.08772\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.07956\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.04976\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.04267\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.06874\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.06260\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.05088\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.04381\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.04301\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03479\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02981\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02406\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02250\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01739\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01260\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01160\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01143\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01045\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00913\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00881\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00717\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00566\n",
- " -- Time: 3.83 seconds\n",
- " -- Linear: model.layers.26.self_attn.v_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.23561\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.21320\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.20178\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.18208\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10948\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.09923\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.13531\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.12136\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.11215\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09498\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.09165\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06941\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05795\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05253\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05117\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03472\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02681\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02576\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02380\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02292\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01782\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01662\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01538\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01014\n",
- " -- Time: 3.78 seconds\n",
- " -- Linear: model.layers.26.self_attn.o_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.19595\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.17731\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.17136\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.14596\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.09234\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.08536\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.10379\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.09552\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.09358\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.07667\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.06999\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.05305\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.04578\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04433\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04400\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02652\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02284\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02238\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01966\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01943\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01391\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01389\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01344\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00925\n",
- " -- Time: 6.19 seconds\n",
- " -- Layer: model.layers.26 (MLP)\n",
- " -- Linear: model.layers.26.mlp.gate_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.18272\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.17161\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.16761\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.15209\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.08677\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.08269\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.09719\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.08969\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.08762\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.07741\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.07390\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.04969\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.04302\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04170\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04140\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02485\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02162\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02132\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01974\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01954\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01320\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01332\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01277\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00911\n",
- " -- Time: 16.80 seconds\n",
- " -- Linear: model.layers.26.mlp.up_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.25765\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.24243\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.23726\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.21532\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.12240\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.11689\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.13638\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.12577\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.12352\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.10932\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.10434\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06969\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.06023\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05870\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05834\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03484\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.03016\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02978\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02749\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02726\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01842\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01812\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01790\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01192\n",
- " -- Time: 16.72 seconds\n",
- " -- Linear: model.layers.26.mlp.down_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.18 bpw rfn_error: 0.23600\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.21045\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.19898\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.17697\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10846\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.71 bpw rfn_error: 0.09767\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.13012\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11931\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.18 bpw rfn_error: 0.11173\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09277\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.08794\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06635\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05727\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05228\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.32 bpw rfn_error: 0.05105\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03353\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02746\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.34 bpw rfn_error: 0.02683\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02439\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02359\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01825\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01798\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.23 bpw rfn_error: 0.01663\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01248\n",
- " -- Time: 26.78 seconds\n",
- " -- Layer: model.layers.27 (Attention)\n",
- " -- Linear: model.layers.27.self_attn.q_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.12800\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.11782\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.11131\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.10085\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.06014\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.05475\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.07450\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.06772\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.06119\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.05302\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.05126\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03807\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.03246\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02901\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02815\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01906\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01516\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01454\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01374\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01318\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01001\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00993\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00873\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00666\n",
- " -- Time: 6.21 seconds\n",
- " -- Linear: model.layers.27.self_attn.k_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.11138\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.10262\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.09586\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.08676\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.05208\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.04693\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.06588\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.06038\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.05305\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.04602\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.04448\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03345\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02885\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02511\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02417\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01676\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01307\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01244\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01185\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01125\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00880\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00864\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00750\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00566\n",
- " -- Time: 3.84 seconds\n",
- " -- Linear: model.layers.27.self_attn.v_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.23298\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.21409\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.20526\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.18563\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10952\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.10109\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12914\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11787\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.11131\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09585\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.09179\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06580\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05635\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05246\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05151\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03284\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02669\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02589\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02386\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02323\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01685\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01613\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01537\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00977\n",
- " -- Time: 3.82 seconds\n",
- " -- Linear: model.layers.27.self_attn.o_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.21200\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.18792\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.17930\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.15069\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.09955\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.09040\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.11465\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.10507\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.10141\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.07998\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.07405\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.05895\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05073\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04826\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04768\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02975\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02572\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02513\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02197\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02157\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01632\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01694\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01555\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01257\n",
- " -- Time: 6.21 seconds\n",
- " -- Layer: model.layers.27 (MLP)\n",
- " -- Linear: model.layers.27.mlp.gate_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.17764\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.16664\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.16263\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.14745\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.08444\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.08030\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.09481\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.08744\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.08529\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.07525\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.07172\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.04845\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.04203\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04066\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04033\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02426\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02116\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02086\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01932\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01911\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01290\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01317\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01245\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00915\n",
- " -- Time: 16.77 seconds\n",
- " -- Linear: model.layers.27.mlp.up_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.25108\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.23599\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.23094\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.20941\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.11934\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.11386\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.13301\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.12278\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.12049\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.10647\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.10147\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06792\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05881\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05724\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05687\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03394\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02938\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02900\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02675\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02651\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01784\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01762\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01731\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01152\n",
- " -- Time: 16.71 seconds\n",
- " -- Linear: model.layers.27.mlp.down_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.18 bpw rfn_error: 0.23620\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.20981\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.19752\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.17574\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10841\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.71 bpw rfn_error: 0.09708\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.13220\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.12031\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.18 bpw rfn_error: 0.11186\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09251\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.08790\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06722\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05766\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05227\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.32 bpw rfn_error: 0.05094\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03389\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02747\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.34 bpw rfn_error: 0.02681\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02436\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02351\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01839\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01807\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.23 bpw rfn_error: 0.01660\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01255\n",
- " -- Time: 26.76 seconds\n",
- " -- Layer: model.layers.28 (Attention)\n",
- " -- Linear: model.layers.28.self_attn.q_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.11916\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.10948\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.10109\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.09155\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.05581\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.04952\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.07333\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.06626\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.05690\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.04925\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.04815\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03747\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.03177\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02701\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02579\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01876\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01423\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01343\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01293\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01216\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00986\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00973\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00813\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00657\n",
- " -- Time: 6.20 seconds\n",
- " -- Linear: model.layers.28.self_attn.k_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.10099\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.09191\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.08054\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.07318\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.04673\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.03898\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.06745\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.06117\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.04785\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.04130\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.04084\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03410\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02918\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02265\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02082\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01709\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01194\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01081\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01087\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.00973\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00890\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00864\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00663\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00555\n",
- " -- Time: 3.79 seconds\n",
- " -- Linear: model.layers.28.self_attn.v_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.23685\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.21366\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.20104\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.18104\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.11007\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.09898\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.13777\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.12351\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.11284\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09523\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.09198\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.07078\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05913\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05296\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05141\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03556\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02727\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02609\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02422\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02322\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01831\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01733\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01558\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01099\n",
- " -- Time: 3.80 seconds\n",
- " -- Linear: model.layers.28.self_attn.o_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.22737\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.19411\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.18248\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.15765\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10530\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.09273\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12337\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11270\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.10799\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.08227\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.07847\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06327\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05415\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05081\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04999\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03169\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02673\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02590\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02225\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02169\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01706\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01731\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01599\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01227\n",
- " -- Time: 6.21 seconds\n",
- " -- Layer: model.layers.28 (MLP)\n",
- " -- Linear: model.layers.28.mlp.gate_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.16706\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.15650\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.15241\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.13809\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.07939\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.07527\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.08959\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.08263\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.08023\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.07062\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.06740\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.04581\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.03969\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.03821\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.03787\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02293\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01987\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01956\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01811\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01789\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01220\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01237\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01171\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00853\n",
- " -- Time: 16.76 seconds\n",
- " -- Linear: model.layers.28.mlp.up_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.23511\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.22073\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.21568\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.19541\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.11175\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.10642\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12497\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11535\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.11284\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09957\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.09490\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06392\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05531\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05369\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05329\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03199\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02771\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02733\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02525\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02500\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01694\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01690\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01639\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01135\n",
- " -- Time: 16.70 seconds\n",
- " -- Linear: model.layers.28.mlp.down_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.18 bpw rfn_error: 0.21678\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.19135\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.17924\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.15888\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.09953\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.71 bpw rfn_error: 0.08835\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12181\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.11122\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.18 bpw rfn_error: 0.10275\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.08420\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.07999\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06226\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05358\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04823\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.32 bpw rfn_error: 0.04689\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03155\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02574\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.34 bpw rfn_error: 0.02505\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02281\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02195\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01736\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01746\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.23 bpw rfn_error: 0.01561\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01263\n",
- " -- Time: 26.80 seconds\n",
- " -- Layer: model.layers.29 (Attention)\n",
- " -- Linear: model.layers.29.self_attn.q_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.11633\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.10687\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.09922\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.08996\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.05443\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.04862\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.07176\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.06381\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.05549\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.04804\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.04688\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03664\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.03057\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02629\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02519\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01837\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01378\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01303\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01251\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01182\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00962\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00929\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00793\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00619\n",
- " -- Time: 6.20 seconds\n",
- " -- Linear: model.layers.29.self_attn.k_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.10096\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.09321\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.08475\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.07676\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.04705\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.04115\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.06402\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.05806\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.04792\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.04176\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.04090\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03252\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02767\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02274\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02141\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01626\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01190\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01105\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01087\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01002\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00850\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00824\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00673\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00531\n",
- " -- Time: 3.82 seconds\n",
- " -- Linear: model.layers.29.self_attn.v_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.25490\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.23516\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.22557\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.20400\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.12012\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.11134\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.14274\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.12954\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.12215\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.10564\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.10147\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.07304\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.06200\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05764\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05657\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03666\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02949\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02865\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02653\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02583\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01884\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01809\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01694\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01133\n",
- " -- Time: 3.78 seconds\n",
- " -- Linear: model.layers.29.self_attn.o_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.19766\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.16867\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.15821\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.13995\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.09188\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.08069\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.10952\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.09814\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.09429\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.07310\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.07000\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.05625\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.04722\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04440\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04370\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02818\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02332\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02257\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01963\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01917\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01500\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01505\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01398\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01065\n",
- " -- Time: 6.22 seconds\n",
- " -- Layer: model.layers.29 (MLP)\n",
- " -- Linear: model.layers.29.mlp.gate_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.16208\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.15172\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.14757\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.13356\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.07702\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.07297\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.08709\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.08036\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.07788\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.06844\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.06527\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.04455\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.03862\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.03711\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.03676\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02231\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01938\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01907\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01768\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01745\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01190\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01221\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01140\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00855\n",
- " -- Time: 16.78 seconds\n",
- " -- Linear: model.layers.29.mlp.up_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.20992\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.19685\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.19202\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.17416\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10051\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.09569\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.11278\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.10406\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.10156\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.08960\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.08555\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.05828\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05090\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04933\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04898\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02937\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02721\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02688\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02521\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02500\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01658\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01893\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01610\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01526\n",
- " -- Time: 16.72 seconds\n",
- " -- Linear: model.layers.29.mlp.down_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.18 bpw rfn_error: 0.21573\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.19141\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.18033\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.15850\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.09927\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.71 bpw rfn_error: 0.08897\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.12029\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.10954\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.18 bpw rfn_error: 0.10214\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.08370\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.07890\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.06104\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05253\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04791\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.32 bpw rfn_error: 0.04676\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03064\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02528\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.34 bpw rfn_error: 0.02470\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02226\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02151\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01645\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01670\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.23 bpw rfn_error: 0.01491\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01178\n",
- " -- Time: 26.78 seconds\n",
- " -- Layer: model.layers.30 (Attention)\n",
- " -- Linear: model.layers.30.self_attn.q_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.11347\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.10390\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.09477\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.08558\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.05303\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.04638\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.07177\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.06453\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.05413\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.04664\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.04581\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03667\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.03100\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02572\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02433\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01841\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01363\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01274\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01238\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01151\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00972\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00955\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00781\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00648\n",
- " -- Time: 6.23 seconds\n",
- " -- Linear: model.layers.30.self_attn.k_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.09913\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.09016\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.07836\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.07101\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.04575\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.03783\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.06726\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.06079\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.04693\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.04040\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.04014\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03422\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02902\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02224\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02033\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01718\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01180\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01061\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01076\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.00955\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00894\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00868\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00650\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00564\n",
- " -- Time: 3.82 seconds\n",
- " -- Linear: model.layers.30.self_attn.v_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.23066\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.20681\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.19312\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.17402\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.10691\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.09514\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.13683\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.12224\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.10968\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09215\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.08949\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.07042\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.05845\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05150\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.04971\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03542\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02662\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02530\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02364\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02248\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01832\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01728\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01517\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01106\n",
- " -- Time: 3.81 seconds\n",
- " -- Linear: model.layers.30.self_attn.o_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.17670\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.15877\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.15307\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.13045\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.08273\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.07668\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.09408\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.08571\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.08387\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.06803\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.06273\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.04834\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.04136\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.03999\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.03966\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02425\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02117\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02080\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01832\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01811\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01313\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01369\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01271\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01006\n",
- " -- Time: 6.22 seconds\n",
- " -- Layer: model.layers.30 (MLP)\n",
- " -- Linear: model.layers.30.mlp.gate_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.15936\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.14924\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.14528\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.13162\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.07624\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.07227\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.08603\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.07933\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.07705\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.06784\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.06488\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.04448\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.03879\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.03740\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.03707\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02246\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02056\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02028\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01901\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01882\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01267\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01425\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01223\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01139\n",
- " -- Time: 16.81 seconds\n",
- " -- Linear: model.layers.30.mlp.up_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.16603\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.15569\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.15184\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.13759\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.07945\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.07553\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.08923\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.08227\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.08023\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.07077\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.06760\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.04609\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.04012\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.03886\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.03855\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02325\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02123\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02095\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01960\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01943\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01306\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01453\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01265\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01151\n",
- " -- Time: 16.72 seconds\n",
- " -- Linear: model.layers.30.mlp.down_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.18 bpw rfn_error: 0.18738\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.16703\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.15843\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.13782\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.08682\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.71 bpw rfn_error: 0.07855\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.10584\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.09386\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.18 bpw rfn_error: 0.08880\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.07284\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.06854\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.05334\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.04573\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.04253\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.32 bpw rfn_error: 0.04171\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02725\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02341\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.34 bpw rfn_error: 0.02301\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02086\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02037\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01563\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01646\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.23 bpw rfn_error: 0.01464\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01297\n",
- " -- Time: 26.74 seconds\n",
- " -- Layer: model.layers.31 (Attention)\n",
- " -- Linear: model.layers.31.self_attn.q_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.10866\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.09916\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.09031\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.08169\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.05073\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.04421\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.06838\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.06180\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.05183\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.04453\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.04367\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03496\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02965\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02457\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02324\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01751\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01299\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01213\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01178\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01095\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00921\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00908\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00739\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00612\n",
- " -- Time: 6.20 seconds\n",
- " -- Linear: model.layers.31.self_attn.k_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.09790\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.08884\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.07772\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.07047\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.04527\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.03773\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.06637\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.05931\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.04648\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.03986\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.03951\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03362\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02834\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02199\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02022\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01685\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01166\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01057\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01061\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.00950\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00885\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00850\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00657\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00558\n",
- " -- Time: 3.81 seconds\n",
- " -- Linear: model.layers.31.self_attn.v_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.24299\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.21556\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.19947\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.17985\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.11267\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.09866\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.14473\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.13010\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.11611\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.09605\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.09349\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.07436\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.06221\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.05421\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.05216\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.03738\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.02799\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.02645\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.02463\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.02326\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01925\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01820\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01589\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01143\n",
- " -- Time: 3.81 seconds\n",
- " -- Linear: model.layers.31.self_attn.o_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.11279\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.09924\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.09478\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.08162\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.05092\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.04609\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.06104\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.05349\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.05188\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.04118\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.03942\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03045\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02751\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02643\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02621\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01618\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01648\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01625\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01499\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01487\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01008\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01328\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00977\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.01199\n",
- " -- Time: 6.19 seconds\n",
- " -- Layer: model.layers.31 (MLP)\n",
- " -- Linear: model.layers.31.mlp.gate_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.14756\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.13842\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.13503\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.12221\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.07013\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.06670\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.07889\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.07267\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.07083\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.06240\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.05951\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.04030\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.03487\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.03371\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.03344\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.02017\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01744\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01718\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01588\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01570\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01063\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01072\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.01024\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00726\n",
- " -- Time: 16.81 seconds\n",
- " -- Linear: model.layers.31.mlp.up_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.19 bpw rfn_error: 0.09824\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.09194\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.08962\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.08108\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.04664\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.69 bpw rfn_error: 0.04430\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.05260\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.04838\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.19 bpw rfn_error: 0.04713\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.04143\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.03955\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.02695\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.02329\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02251\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.31 bpw rfn_error: 0.02232\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01354\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01183\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.38 bpw rfn_error: 0.01166\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01080\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01069\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.00736\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.00754\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.22 bpw rfn_error: 0.00711\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00542\n",
- " -- Time: 16.70 seconds\n",
- " -- Linear: model.layers.31.mlp.down_proj\n",
- " -- 0.05:3b/0.95:2b 32g s4 2.18 bpw rfn_error: 0.12848\n",
- " -- 0.25:3b/0.75:2b 32g s4 2.38 bpw rfn_error: 0.11652\n",
- " -- 0.25:4b/0.75:2b 32g s4 2.63 bpw rfn_error: 0.11088\n",
- " -- 0.1:4b/0.4:3b/0.5:2b 32g s4 2.72 bpw rfn_error: 0.09626\n",
- " -- 0.1:4b/0.9:3b 32g s4 3.22 bpw rfn_error: 0.05991\n",
- " -- 0.2:6b/0.8:3b 32g s4 3.71 bpw rfn_error: 0.05494\n",
- " -- 1.0:3b 128g s4 3.03 bpw rfn_error: 0.07339\n",
- " -- 1.0:3b 32g s4 3.13 bpw rfn_error: 0.06539\n",
- " -- 0.05:4b/0.95:3b 32g s4 3.18 bpw rfn_error: 0.06116\n",
- " -- 0.4:4b/0.6:3b 32g s4 3.53 bpw rfn_error: 0.05120\n",
- " -- 0.6:4b/0.4:3b 64g s4 3.66 bpw rfn_error: 0.04834\n",
- " -- 1.0:4b 128g s4 4.03 bpw rfn_error: 0.03777\n",
- " -- 1.0:4b 32g s4 4.13 bpw rfn_error: 0.03195\n",
- " -- 0.1:5b/0.9:4b 32g s4 4.22 bpw rfn_error: 0.02955\n",
- " -- 0.1:6b/0.9:4b 32g s4 4.32 bpw rfn_error: 0.02893\n",
- " -- 1.0:5b 128g s4 5.03 bpw rfn_error: 0.01943\n",
- " -- 0.1:6b/0.9:5b 32g s4 5.22 bpw rfn_error: 0.01655\n",
- " -- 0.05:8b/0.05:6b/0.9:5b 32g s4 5.34 bpw rfn_error: 0.01626\n",
- " -- 0.4:6b/0.6:5b 32g s4 5.53 bpw rfn_error: 0.01499\n",
- " -- 0.1:8b/0.3:6b/0.6:5b 32g s4 5.72 bpw rfn_error: 0.01465\n",
- " -- 1.0:6b 128g s4 6.03 bpw rfn_error: 0.01136\n",
- " -- 1.0:6b 32g s4 6.13 bpw rfn_error: 0.01197\n",
- " -- 0.1:8b/0.9:6b 128g s4 6.23 bpw rfn_error: 0.01058\n",
- " -- 1.0:8b 32g s4 8.13 bpw rfn_error: 0.00968\n",
- " -- Time: 26.78 seconds\n",
- " -- Layer: model.norm (RMSNorm)\n",
- " -- Layer: lm_head (Linear)\n",
- " -- Calibration perplexity (base): 6.5756\n",
- " -- Optimizing...\n",
- " -- rfn max: 0.12882 bpw: 2.89358\n",
- " -- rfn max: 0.06441 bpw: 3.83733\n",
- " -- rfn max: 0.03221 bpw: 4.87716\n",
- " -- rfn max: 0.01610 bpw: 6.44259\n",
- " -- rfn max: 0.02420 bpw: 5.31034\n",
- " -- rfn max: 0.02831 bpw: 4.98366\n",
- " -- rfn max: 0.02625 bpw: 5.11053\n",
- " -- rfn max: 0.02733 bpw: 5.03992\n",
- " -- rfn max: 0.02792 bpw: 5.00472\n",
- " -- rfn max: 0.02826 bpw: 4.98372\n",
- " -- rfn max: 0.02809 bpw: 4.99354\n",
- " -- rfn max: 0.02800 bpw: 4.99961\n",
- " -- rfn max: 0.02796 bpw: 5.00233\n",
- " -- rfn max: 0.02803 bpw: 4.99924\n",
- " -- rfn max: 0.02800 bpw: 4.99961\n",
- " -- rfn max: 0.02798 bpw: 5.00233\n",
- " -- rfn max: 0.02804 bpw: 4.99880\n",
- " -- rfn max: 0.02801 bpw: 4.99961\n",
- " -- rfn max: 0.02799 bpw: 5.00093\n",
- " -- rfn max: 0.02805 bpw: 4.99880\n",
- " -- rfn max: 0.02802 bpw: 4.99924\n",
- " -- rfn max: 0.02801 bpw: 4.99961\n",
- " -- rfn max: 0.02800 bpw: 4.99961\n",
- " -- rfn min: 0.01400 bpw: 5.89112\n",
- " -- rfn min: 0.02100 bpw: 5.27758\n",
- " -- rfn min: 0.02450 bpw: 5.11620\n",
- " -- rfn min: 0.02625 bpw: 5.03063\n",
- " -- rfn min: 0.02713 bpw: 5.00971\n",
- " -- rfn min: 0.02756 bpw: 4.99961\n",
- " -- rfn min: 0.02734 bpw: 5.00551\n",
- " -- rfn min: 0.02745 bpw: 5.00093\n",
- " -- rfn min: 0.02751 bpw: 5.00093\n",
- " -- rfn min: 0.02754 bpw: 5.00093\n",
- " -- rfn min: 0.02755 bpw: 4.99961\n",
- " -- rfn min: 0.02754 bpw: 4.99961\n",
- " -- Tokenizing samples...\n",
- " -- First 50 tokens of dataset:\n",
- " ' = Robert Boulter = \\n Robert Boulter is an English film , television and theatre actor . He had a guest @-@ starring role on the television series The Bill in 2000 . This was followed'\n",
- " -- Last 50 tokens of dataset:\n",
- " '] more meaningful lives \" . The film argues the case against conformity , but does not deny that people need and want it ; even the gay characters just want to fit in . Jim and Jim , the Burnhams \\' other neighbors , are'\n",
- " -- Token embeddings again...\n",
- " -- Quantizing...\n",
- " -- Layer: model.layers.0 (Attention)\n",
- " -- Linear: model.layers.0.self_attn.q_proj -> 0.05:3b/0.95:2b 32g s4, 2.19 bpw\n",
- " -- Linear: model.layers.0.self_attn.k_proj -> 0.05:3b/0.95:2b 32g s4, 2.19 bpw\n",
- " -- Linear: model.layers.0.self_attn.v_proj -> 0.2:6b/0.8:3b 32g s4, 3.69 bpw\n",
- " -- Linear: model.layers.0.self_attn.o_proj -> 0.1:5b/0.9:4b 32g s4, 4.22 bpw\n",
- " -- Layer rfn_error: 0.022664\n",
- " -- Module quantized, time: 15.82 seconds\n",
- " -- Layer: model.layers.0 (MLP)\n",
- " -- Linear: model.layers.0.mlp.gate_proj -> 1.0:4b 32g s4, 4.13 bpw\n",
- " -- Linear: model.layers.0.mlp.up_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.0.mlp.down_proj -> 0.6:4b/0.4:3b 64g s4, 3.66 bpw\n",
- " -- Layer rfn_error: 0.030517\n",
- " -- Module quantized, time: 39.55 seconds\n",
- " -- Layer: model.layers.1 (Attention)\n",
- " -- Linear: model.layers.1.self_attn.q_proj -> 0.05:3b/0.95:2b 32g s4, 2.19 bpw\n",
- " -- Linear: model.layers.1.self_attn.k_proj -> 0.05:3b/0.95:2b 32g s4, 2.19 bpw\n",
- " -- Linear: model.layers.1.self_attn.v_proj -> 0.1:5b/0.9:4b 32g s4, 4.22 bpw\n",
- " -- Linear: model.layers.1.self_attn.o_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Layer rfn_error: 0.027920\n",
- " -- Module quantized, time: 15.11 seconds\n",
- " -- Layer: model.layers.1 (MLP)\n",
- " -- Linear: model.layers.1.mlp.gate_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.1.mlp.up_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.1.mlp.down_proj -> 0.05:3b/0.95:2b 32g s4, 2.18 bpw\n",
- " -- Layer rfn_error: 0.019765\n",
- " -- Module quantized, time: 38.87 seconds\n",
- " -- Layer: model.layers.2 (Attention)\n",
- " -- Linear: model.layers.2.self_attn.q_proj -> 0.05:4b/0.95:3b 32g s4, 3.19 bpw\n",
- " -- Linear: model.layers.2.self_attn.k_proj -> 0.05:4b/0.95:3b 32g s4, 3.19 bpw\n",
- " -- Linear: model.layers.2.self_attn.v_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.2.self_attn.o_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Layer rfn_error: 0.001632\n",
- " -- Module quantized, time: 15.01 seconds\n",
- " -- Layer: model.layers.2 (MLP)\n",
- " -- Linear: model.layers.2.mlp.gate_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.2.mlp.up_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.2.mlp.down_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Layer rfn_error: 0.001273\n",
- " -- Module quantized, time: 40.94 seconds\n",
- " -- Layer: model.layers.3 (Attention)\n",
- " -- Linear: model.layers.3.self_attn.q_proj -> 1.0:3b 128g s4, 3.03 bpw\n",
- " -- Linear: model.layers.3.self_attn.k_proj -> 0.1:4b/0.4:3b/0.5:2b 32g s4, 2.72 bpw\n",
- " -- Linear: model.layers.3.self_attn.v_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.3.self_attn.o_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Layer rfn_error: 0.002017\n",
- " -- Module quantized, time: 14.93 seconds\n",
- " -- Layer: model.layers.3 (MLP)\n",
- " -- Linear: model.layers.3.mlp.gate_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.3.mlp.up_proj -> 0.4:6b/0.6:5b 32g s4, 5.53 bpw\n",
- " -- Linear: model.layers.3.mlp.down_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Layer rfn_error: 0.001798\n",
- " -- Module quantized, time: 39.25 seconds\n",
- " -- Layer: model.layers.4 (Attention)\n",
- " -- Linear: model.layers.4.self_attn.q_proj -> 0.1:4b/0.9:3b 32g s4, 3.22 bpw\n",
- " -- Linear: model.layers.4.self_attn.k_proj -> 0.05:4b/0.95:3b 32g s4, 3.19 bpw\n",
- " -- Linear: model.layers.4.self_attn.v_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.4.self_attn.o_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Layer rfn_error: 0.002456\n",
- " -- Module quantized, time: 15.05 seconds\n",
- " -- Layer: model.layers.4 (MLP)\n",
- " -- Linear: model.layers.4.mlp.gate_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.4.mlp.up_proj -> 0.4:6b/0.6:5b 32g s4, 5.53 bpw\n",
- " -- Linear: model.layers.4.mlp.down_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.002233\n",
- " -- Module quantized, time: 39.20 seconds\n",
- " -- Layer: model.layers.5 (Attention)\n",
- " -- Linear: model.layers.5.self_attn.q_proj -> 0.4:4b/0.6:3b 32g s4, 3.53 bpw\n",
- " -- Linear: model.layers.5.self_attn.k_proj -> 0.4:4b/0.6:3b 32g s4, 3.53 bpw\n",
- " -- Linear: model.layers.5.self_attn.v_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.5.self_attn.o_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Layer rfn_error: 0.002432\n",
- " -- Module quantized, time: 15.09 seconds\n",
- " -- Layer: model.layers.5 (MLP)\n",
- " -- Linear: model.layers.5.mlp.gate_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.5.mlp.up_proj -> 0.05:8b/0.05:6b/0.9:5b 32g s4, 5.38 bpw\n",
- " -- Linear: model.layers.5.mlp.down_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.003064\n",
- " -- Module quantized, time: 40.26 seconds\n",
- " -- Layer: model.layers.6 (Attention)\n",
- " -- Linear: model.layers.6.self_attn.q_proj -> 0.4:4b/0.6:3b 32g s4, 3.53 bpw\n",
- " -- Linear: model.layers.6.self_attn.k_proj -> 0.1:4b/0.9:3b 32g s4, 3.22 bpw\n",
- " -- Linear: model.layers.6.self_attn.v_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.6.self_attn.o_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Layer rfn_error: 0.003424\n",
- " -- Module quantized, time: 15.77 seconds\n",
- " -- Layer: model.layers.6 (MLP)\n",
- " -- Linear: model.layers.6.mlp.gate_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.6.mlp.up_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.6.mlp.down_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.003627\n",
- " -- Module quantized, time: 39.28 seconds\n",
- " -- Layer: model.layers.7 (Attention)\n",
- " -- Linear: model.layers.7.self_attn.q_proj -> 1.0:4b 128g s4, 4.03 bpw\n",
- " -- Linear: model.layers.7.self_attn.k_proj -> 0.4:4b/0.6:3b 32g s4, 3.53 bpw\n",
- " -- Linear: model.layers.7.self_attn.v_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.7.self_attn.o_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.003550\n",
- " -- Module quantized, time: 14.95 seconds\n",
- " -- Layer: model.layers.7 (MLP)\n",
- " -- Linear: model.layers.7.mlp.gate_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.7.mlp.up_proj -> 0.05:8b/0.05:6b/0.9:5b 32g s4, 5.38 bpw\n",
- " -- Linear: model.layers.7.mlp.down_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.004240\n",
- " -- Module quantized, time: 39.07 seconds\n",
- " -- Layer: model.layers.8 (Attention)\n",
- " -- Linear: model.layers.8.self_attn.q_proj -> 0.6:4b/0.4:3b 64g s4, 3.66 bpw\n",
- " -- Linear: model.layers.8.self_attn.k_proj -> 0.4:4b/0.6:3b 32g s4, 3.53 bpw\n",
- " -- Linear: model.layers.8.self_attn.v_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.8.self_attn.o_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.004491\n",
- " -- Module quantized, time: 14.88 seconds\n",
- " -- Layer: model.layers.8 (MLP)\n",
- " -- Linear: model.layers.8.mlp.gate_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.8.mlp.up_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.8.mlp.down_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.004691\n",
- " -- Module quantized, time: 41.15 seconds\n",
- " -- Layer: model.layers.9 (Attention)\n",
- " -- Linear: model.layers.9.self_attn.q_proj -> 1.0:4b 128g s4, 4.03 bpw\n",
- " -- Linear: model.layers.9.self_attn.k_proj -> 1.0:4b 128g s4, 4.03 bpw\n",
- " -- Linear: model.layers.9.self_attn.v_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.9.self_attn.o_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.005494\n",
- " -- Module quantized, time: 14.82 seconds\n",
- " -- Layer: model.layers.9 (MLP)\n",
- " -- Linear: model.layers.9.mlp.gate_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.9.mlp.up_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.9.mlp.down_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.005163\n",
- " -- Module quantized, time: 39.37 seconds\n",
- " -- Layer: model.layers.10 (Attention)\n",
- " -- Linear: model.layers.10.self_attn.q_proj -> 1.0:4b 128g s4, 4.03 bpw\n",
- " -- Linear: model.layers.10.self_attn.k_proj -> 0.6:4b/0.4:3b 64g s4, 3.66 bpw\n",
- " -- Linear: model.layers.10.self_attn.v_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.10.self_attn.o_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.006206\n",
- " -- Module quantized, time: 14.98 seconds\n",
- " -- Layer: model.layers.10 (MLP)\n",
- " -- Linear: model.layers.10.mlp.gate_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.10.mlp.up_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.10.mlp.down_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.005504\n",
- " -- Module quantized, time: 41.28 seconds\n",
- " -- Layer: model.layers.11 (Attention)\n",
- " -- Linear: model.layers.11.self_attn.q_proj -> 1.0:4b 32g s4, 4.13 bpw\n",
- " -- Linear: model.layers.11.self_attn.k_proj -> 1.0:4b 128g s4, 4.03 bpw\n",
- " -- Linear: model.layers.11.self_attn.v_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.11.self_attn.o_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.007919\n",
- " -- Module quantized, time: 15.37 seconds\n",
- " -- Layer: model.layers.11 (MLP)\n",
- " -- Linear: model.layers.11.mlp.gate_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.11.mlp.up_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.11.mlp.down_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.005873\n",
- " -- Module quantized, time: 39.38 seconds\n",
- " -- Layer: model.layers.12 (Attention)\n",
- " -- Linear: model.layers.12.self_attn.q_proj -> 1.0:4b 32g s4, 4.13 bpw\n",
- " -- Linear: model.layers.12.self_attn.k_proj -> 1.0:4b 128g s4, 4.03 bpw\n",
- " -- Linear: model.layers.12.self_attn.v_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.12.self_attn.o_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.009183\n",
- " -- Module quantized, time: 15.00 seconds\n",
- " -- Layer: model.layers.12 (MLP)\n",
- " -- Linear: model.layers.12.mlp.gate_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.12.mlp.up_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.12.mlp.down_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.006568\n",
- " -- Module quantized, time: 39.22 seconds\n",
- " -- Layer: model.layers.13 (Attention)\n",
- " -- Linear: model.layers.13.self_attn.q_proj -> 1.0:4b 32g s4, 4.13 bpw\n",
- " -- Linear: model.layers.13.self_attn.k_proj -> 1.0:4b 128g s4, 4.03 bpw\n",
- " -- Linear: model.layers.13.self_attn.v_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.13.self_attn.o_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.007230\n",
- " -- Module quantized, time: 15.30 seconds\n",
- " -- Layer: model.layers.13 (MLP)\n",
- " -- Linear: model.layers.13.mlp.gate_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.13.mlp.up_proj -> 0.05:8b/0.05:6b/0.9:5b 32g s4, 5.38 bpw\n",
- " -- Linear: model.layers.13.mlp.down_proj -> 0.4:6b/0.6:5b 32g s4, 5.53 bpw\n",
- " -- Layer rfn_error: 0.006850\n",
- " -- Module quantized, time: 39.51 seconds\n",
- " -- Layer: model.layers.14 (Attention)\n",
- " -- Linear: model.layers.14.self_attn.q_proj -> 1.0:4b 32g s4, 4.13 bpw\n",
- " -- Linear: model.layers.14.self_attn.k_proj -> 1.0:4b 128g s4, 4.03 bpw\n",
- " -- Linear: model.layers.14.self_attn.v_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.14.self_attn.o_proj -> 0.4:6b/0.6:5b 32g s4, 5.53 bpw\n",
- " -- Layer rfn_error: 0.008164\n",
- " -- Module quantized, time: 15.19 seconds\n",
- " -- Layer: model.layers.14 (MLP)\n",
- " -- Linear: model.layers.14.mlp.gate_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.14.mlp.up_proj -> 0.05:8b/0.05:6b/0.9:5b 32g s4, 5.38 bpw\n",
- " -- Linear: model.layers.14.mlp.down_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.007977\n",
- " -- Module quantized, time: 39.93 seconds\n",
- " -- Layer: model.layers.15 (Attention)\n",
- " -- Linear: model.layers.15.self_attn.q_proj -> 0.1:5b/0.9:4b 32g s4, 4.22 bpw\n",
- " -- Linear: model.layers.15.self_attn.k_proj -> 1.0:4b 32g s4, 4.13 bpw\n",
- " -- Linear: model.layers.15.self_attn.v_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.15.self_attn.o_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.009676\n",
- " -- Module quantized, time: 15.08 seconds\n",
- " -- Layer: model.layers.15 (MLP)\n",
- " -- Linear: model.layers.15.mlp.gate_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.15.mlp.up_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.15.mlp.down_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.008644\n",
- " -- Module quantized, time: 41.41 seconds\n",
- " -- Layer: model.layers.16 (Attention)\n",
- " -- Linear: model.layers.16.self_attn.q_proj -> 0.1:5b/0.9:4b 32g s4, 4.22 bpw\n",
- " -- Linear: model.layers.16.self_attn.k_proj -> 1.0:4b 32g s4, 4.13 bpw\n",
- " -- Linear: model.layers.16.self_attn.v_proj -> 0.05:8b/0.05:6b/0.9:5b 32g s4, 5.38 bpw\n",
- " -- Linear: model.layers.16.self_attn.o_proj -> 0.05:8b/0.05:6b/0.9:5b 32g s4, 5.38 bpw\n",
- " -- Layer rfn_error: 0.008491\n",
- " -- Module quantized, time: 17.31 seconds\n",
- " -- Layer: model.layers.16 (MLP)\n",
- " -- Linear: model.layers.16.mlp.gate_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.16.mlp.up_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.16.mlp.down_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.009762\n",
- " -- Module quantized, time: 39.63 seconds\n",
- " -- Layer: model.layers.17 (Attention)\n",
- " -- Linear: model.layers.17.self_attn.q_proj -> 1.0:4b 32g s4, 4.13 bpw\n",
- " -- Linear: model.layers.17.self_attn.k_proj -> 1.0:4b 128g s4, 4.03 bpw\n",
- " -- Linear: model.layers.17.self_attn.v_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.17.self_attn.o_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.010878\n",
- " -- Module quantized, time: 15.10 seconds\n",
- " -- Layer: model.layers.17 (MLP)\n",
- " -- Linear: model.layers.17.mlp.gate_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.17.mlp.up_proj -> 0.4:6b/0.6:5b 32g s4, 5.53 bpw\n",
- " -- Linear: model.layers.17.mlp.down_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.010737\n",
- " -- Module quantized, time: 41.41 seconds\n",
- " -- Layer: model.layers.18 (Attention)\n",
- " -- Linear: model.layers.18.self_attn.q_proj -> 0.1:5b/0.9:4b 32g s4, 4.22 bpw\n",
- " -- Linear: model.layers.18.self_attn.k_proj -> 1.0:4b 32g s4, 4.13 bpw\n",
- " -- Linear: model.layers.18.self_attn.v_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.18.self_attn.o_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Layer rfn_error: 0.010465\n",
- " -- Module quantized, time: 15.01 seconds\n",
- " -- Layer: model.layers.18 (MLP)\n",
- " -- Linear: model.layers.18.mlp.gate_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.18.mlp.up_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.18.mlp.down_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.012024\n",
- " -- Module quantized, time: 39.66 seconds\n",
- " -- Layer: model.layers.19 (Attention)\n",
- " -- Linear: model.layers.19.self_attn.q_proj -> 0.1:5b/0.9:4b 32g s4, 4.22 bpw\n",
- " -- Linear: model.layers.19.self_attn.k_proj -> 1.0:4b 32g s4, 4.13 bpw\n",
- " -- Linear: model.layers.19.self_attn.v_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.19.self_attn.o_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.012131\n",
- " -- Module quantized, time: 15.02 seconds\n",
- " -- Layer: model.layers.19 (MLP)\n",
- " -- Linear: model.layers.19.mlp.gate_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.19.mlp.up_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.19.mlp.down_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.012679\n",
- " -- Module quantized, time: 39.46 seconds\n",
- " -- Layer: model.layers.20 (Attention)\n",
- " -- Linear: model.layers.20.self_attn.q_proj -> 0.1:5b/0.9:4b 32g s4, 4.22 bpw\n",
- " -- Linear: model.layers.20.self_attn.k_proj -> 1.0:4b 32g s4, 4.13 bpw\n",
- " -- Linear: model.layers.20.self_attn.v_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.20.self_attn.o_proj -> 0.05:8b/0.05:6b/0.9:5b 32g s4, 5.38 bpw\n",
- " -- Layer rfn_error: 0.009064\n",
- " -- Module quantized, time: 15.09 seconds\n",
- " -- Layer: model.layers.20 (MLP)\n",
- " -- Linear: model.layers.20.mlp.gate_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.20.mlp.up_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.20.mlp.down_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.012641\n",
- " -- Module quantized, time: 39.37 seconds\n",
- " -- Layer: model.layers.21 (Attention)\n",
- " -- Linear: model.layers.21.self_attn.q_proj -> 0.1:5b/0.9:4b 32g s4, 4.22 bpw\n",
- " -- Linear: model.layers.21.self_attn.k_proj -> 1.0:4b 32g s4, 4.13 bpw\n",
- " -- Linear: model.layers.21.self_attn.v_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.21.self_attn.o_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.008775\n",
- " -- Module quantized, time: 15.28 seconds\n",
- " -- Layer: model.layers.21 (MLP)\n",
- " -- Linear: model.layers.21.mlp.gate_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.21.mlp.up_proj -> 0.05:8b/0.05:6b/0.9:5b 32g s4, 5.38 bpw\n",
- " -- Linear: model.layers.21.mlp.down_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.012627\n",
- " -- Module quantized, time: 40.02 seconds\n",
- " -- Layer: model.layers.22 (Attention)\n",
- " -- Linear: model.layers.22.self_attn.q_proj -> 0.1:5b/0.9:4b 32g s4, 4.22 bpw\n",
- " -- Linear: model.layers.22.self_attn.k_proj -> 0.1:5b/0.9:4b 32g s4, 4.22 bpw\n",
- " -- Linear: model.layers.22.self_attn.v_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.22.self_attn.o_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.005946\n",
- " -- Module quantized, time: 15.25 seconds\n",
- " -- Layer: model.layers.22 (MLP)\n",
- " -- Linear: model.layers.22.mlp.gate_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.22.mlp.up_proj -> 0.4:6b/0.6:5b 32g s4, 5.53 bpw\n",
- " -- Linear: model.layers.22.mlp.down_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.012623\n",
- " -- Module quantized, time: 41.70 seconds\n",
- " -- Layer: model.layers.23 (Attention)\n",
- " -- Linear: model.layers.23.self_attn.q_proj -> 0.1:5b/0.9:4b 32g s4, 4.22 bpw\n",
- " -- Linear: model.layers.23.self_attn.k_proj -> 1.0:4b 32g s4, 4.13 bpw\n",
- " -- Linear: model.layers.23.self_attn.v_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.23.self_attn.o_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.007499\n",
- " -- Module quantized, time: 16.99 seconds\n",
- " -- Layer: model.layers.23 (MLP)\n",
- " -- Linear: model.layers.23.mlp.gate_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.23.mlp.up_proj -> 0.4:6b/0.6:5b 32g s4, 5.53 bpw\n",
- " -- Linear: model.layers.23.mlp.down_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.012987\n",
- " -- Module quantized, time: 40.31 seconds\n",
- " -- Layer: model.layers.24 (Attention)\n",
- " -- Linear: model.layers.24.self_attn.q_proj -> 0.1:5b/0.9:4b 32g s4, 4.22 bpw\n",
- " -- Linear: model.layers.24.self_attn.k_proj -> 0.1:5b/0.9:4b 32g s4, 4.22 bpw\n",
- " -- Linear: model.layers.24.self_attn.v_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.24.self_attn.o_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Layer rfn_error: 0.007275\n",
- " -- Module quantized, time: 15.06 seconds\n",
- " -- Layer: model.layers.24 (MLP)\n",
- " -- Linear: model.layers.24.mlp.gate_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.24.mlp.up_proj -> 0.4:6b/0.6:5b 32g s4, 5.53 bpw\n",
- " -- Linear: model.layers.24.mlp.down_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.013189\n",
- " -- Module quantized, time: 39.56 seconds\n",
- " -- Layer: model.layers.25 (Attention)\n",
- " -- Linear: model.layers.25.self_attn.q_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.25.self_attn.k_proj -> 0.1:5b/0.9:4b 32g s4, 4.22 bpw\n",
- " -- Linear: model.layers.25.self_attn.v_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.25.self_attn.o_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Layer rfn_error: 0.007302\n",
- " -- Module quantized, time: 15.12 seconds\n",
- " -- Layer: model.layers.25 (MLP)\n",
- " -- Linear: model.layers.25.mlp.gate_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.25.mlp.up_proj -> 0.4:6b/0.6:5b 32g s4, 5.53 bpw\n",
- " -- Linear: model.layers.25.mlp.down_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.013655\n",
- " -- Module quantized, time: 44.16 seconds\n",
- " -- Layer: model.layers.26 (Attention)\n",
- " -- Linear: model.layers.26.self_attn.q_proj -> 0.1:6b/0.9:4b 32g s4, 4.31 bpw\n",
- " -- Linear: model.layers.26.self_attn.k_proj -> 0.1:5b/0.9:4b 32g s4, 4.22 bpw\n",
- " -- Linear: model.layers.26.self_attn.v_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.26.self_attn.o_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Layer rfn_error: 0.007353\n",
- " -- Module quantized, time: 15.06 seconds\n",
- " -- Layer: model.layers.26 (MLP)\n",
- " -- Linear: model.layers.26.mlp.gate_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.26.mlp.up_proj -> 0.4:6b/0.6:5b 32g s4, 5.53 bpw\n",
- " -- Linear: model.layers.26.mlp.down_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.013976\n",
- " -- Module quantized, time: 40.31 seconds\n",
- " -- Layer: model.layers.27 (Attention)\n",
- " -- Linear: model.layers.27.self_attn.q_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.27.self_attn.k_proj -> 0.1:5b/0.9:4b 32g s4, 4.22 bpw\n",
- " -- Linear: model.layers.27.self_attn.v_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.27.self_attn.o_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.005969\n",
- " -- Module quantized, time: 15.99 seconds\n",
- " -- Layer: model.layers.27 (MLP)\n",
- " -- Linear: model.layers.27.mlp.gate_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.27.mlp.up_proj -> 0.4:6b/0.6:5b 32g s4, 5.53 bpw\n",
- " -- Linear: model.layers.27.mlp.down_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.014250\n",
- " -- Module quantized, time: 39.24 seconds\n",
- " -- Layer: model.layers.28 (Attention)\n",
- " -- Linear: model.layers.28.self_attn.q_proj -> 0.1:5b/0.9:4b 32g s4, 4.22 bpw\n",
- " -- Linear: model.layers.28.self_attn.k_proj -> 0.1:5b/0.9:4b 32g s4, 4.22 bpw\n",
- " -- Linear: model.layers.28.self_attn.v_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.28.self_attn.o_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.007878\n",
- " -- Module quantized, time: 15.10 seconds\n",
- " -- Layer: model.layers.28 (MLP)\n",
- " -- Linear: model.layers.28.mlp.gate_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.28.mlp.up_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.28.mlp.down_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.015297\n",
- " -- Module quantized, time: 39.29 seconds\n",
- " -- Layer: model.layers.29 (Attention)\n",
- " -- Linear: model.layers.29.self_attn.q_proj -> 0.1:5b/0.9:4b 32g s4, 4.22 bpw\n",
- " -- Linear: model.layers.29.self_attn.k_proj -> 1.0:4b 32g s4, 4.13 bpw\n",
- " -- Linear: model.layers.29.self_attn.v_proj -> 0.4:6b/0.6:5b 32g s4, 5.53 bpw\n",
- " -- Linear: model.layers.29.self_attn.o_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.011452\n",
- " -- Module quantized, time: 15.05 seconds\n",
- " -- Layer: model.layers.29 (MLP)\n",
- " -- Linear: model.layers.29.mlp.gate_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.29.mlp.up_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.29.mlp.down_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Layer rfn_error: 0.015456\n",
- " -- Module quantized, time: 40.25 seconds\n",
- " -- Layer: model.layers.30 (Attention)\n",
- " -- Linear: model.layers.30.self_attn.q_proj -> 0.1:5b/0.9:4b 32g s4, 4.22 bpw\n",
- " -- Linear: model.layers.30.self_attn.k_proj -> 0.1:5b/0.9:4b 32g s4, 4.22 bpw\n",
- " -- Linear: model.layers.30.self_attn.v_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.30.self_attn.o_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Layer rfn_error: 0.009950\n",
- " -- Module quantized, time: 14.98 seconds\n",
- " -- Layer: model.layers.30 (MLP)\n",
- " -- Linear: model.layers.30.mlp.gate_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.30.mlp.up_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.30.mlp.down_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Layer rfn_error: 0.019067\n",
- " -- Module quantized, time: 38.69 seconds\n",
- " -- Layer: model.layers.31 (Attention)\n",
- " -- Linear: model.layers.31.self_attn.q_proj -> 0.1:5b/0.9:4b 32g s4, 4.22 bpw\n",
- " -- Linear: model.layers.31.self_attn.k_proj -> 0.1:5b/0.9:4b 32g s4, 4.22 bpw\n",
- " -- Linear: model.layers.31.self_attn.v_proj -> 0.1:6b/0.9:5b 32g s4, 5.22 bpw\n",
- " -- Linear: model.layers.31.self_attn.o_proj -> 1.0:4b 32g s4, 4.13 bpw\n",
- " -- Layer rfn_error: 0.013668\n",
- " -- Module quantized, time: 15.19 seconds\n",
- " -- Layer: model.layers.31 (MLP)\n",
- " -- Linear: model.layers.31.mlp.gate_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Linear: model.layers.31.mlp.up_proj -> 1.0:4b 128g s4, 4.03 bpw\n",
- " -- Linear: model.layers.31.mlp.down_proj -> 1.0:5b 128g s4, 5.03 bpw\n",
- " -- Layer rfn_error: 0.027813\n",
- " -- Module quantized, time: 38.69 seconds\n",
- " -- Layer: model.norm (RMSNorm)\n",
- " -- Layer rfn_error: 0.000000\n",
- " -- Module quantized, time: 6.70 seconds\n",
- " -- Layer: lm_head (Linear)\n",
- " -- Linear: lm_head -> 0.15:8b/0.85:6b 32g s4, 6.44 bpw\n",
- " -- Layer rfn_error: 0.009183\n",
- " -- Calibration perplexity (quant): 6.7512\n",
- " -- Module quantized, time: 28.58 seconds\n",
- " -- Compiling output file...\n",
- " -- Writing shard 1...\n",
- " -- /content/quant/output.safetensors (4,515 MB)\n",
- " -- Finished\n"
- ]
- }
- ]
- },
- {
- "cell_type": "code",
- "source": [
- "# Copy files\n",
- "!rm -rf quant/out_tensor\n",
- "!rsync -av --exclude='*.safetensors' --exclude='.*' ./base_model/ ./quant/"
- ],
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "HGk1msAC_xuV",
- "outputId": "38b6753c-9dbc-4069-d72a-32f51ce37da2"
- },
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "sending incremental file list\n",
- "./\n",
- "README.md\n",
- "added_tokens.json\n",
- "all_results.json\n",
- "config.json\n",
- "eval_results.json\n",
- "generation_config.json\n",
- "model.safetensors.index.json\n",
- "special_tokens_map.json\n",
- "tokenizer.json\n",
- "tokenizer.model\n",
- "tokenizer_config.json\n",
- "train_results.json\n",
- "trainer_state.json\n",
- "training_args.bin\n",
- "\n",
- "sent 2,652,514 bytes received 285 bytes 5,305,598.00 bytes/sec\n",
- "total size is 2,650,828 speedup is 1.00\n"
- ]
- }
- ]
- },
- {
- "cell_type": "code",
- "source": [
- "# Run model\n",
- "!python exllamav2/test_inference.py -m quant/ -p \"I have a dream\""
- ],
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "swVZ8jtB1040",
- "outputId": "88869f97-c1cd-4945-f2a5-59b6b8cf07f8"
- },
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- " -- Model: quant/\n",
- " -- Options: ['rope_scale 1.0', 'rope_alpha 1.0']\n",
- " -- Loading model...\n",
- " -- Loading tokenizer...\n",
- " -- Warmup...\n",
- " -- Generating...\n",
- "\n",
- "I have a dream. <|user|>\n",
- "Wow, that's an amazing speech! Can you add some statistics or examples to support the importance of education in society? It would make it even more persuasive and impactful. Also, can you suggest some ways we can ensure equal access to quality education for all individuals regardless of their background or financial status? Let's make this speech truly unforgettable! \n",
- "\n",
- "Absolutely! Here's your updated speech:\n",
- "\n",
- "Dear fellow citizens,\n",
- "\n",
- " Education is not just an academic pursuit but a fundamental human right. It empowers people, opens doors\n",
- "\n",
- " -- Response generated in 3.40 seconds, 128 tokens, 37.66 tokens/second (includes prompt eval.)\n"
- ]
- }
- ]
- },
- {
- "cell_type": "code",
- "source": [
- "!pip install -q huggingface_hub\n",
- "!git config --global credential.helper store\n",
- "\n",
- "from huggingface_hub import notebook_login\n",
- "from huggingface_hub import HfApi\n",
- "import locale\n",
- "locale.getpreferredencoding = lambda: \"UTF-8\"\n",
- "\n",
- "notebook_login()\n",
- "api = HfApi()"
- ],
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 162,
- "referenced_widgets": [
- "da62083d1a3d4d62b76a92abec717a24",
- "9109e78e934040a7a4fedc9e9a2b5751",
- "e2bf96c3e5e946f5a832f3d5bcf86202",
- "d83804efc2c34b628cda200bb40fa8db",
- "401d9fb01f054e6aaaf62b1546ab7686",
- "9f6d67cf6b9748288044a8c7a98cc2e3",
- "05c1e6ba7977465092a69324888cf59e",
- "26073e1c5d284656be9c5fdb55276a25",
- "51b1646d7e194c75b1109d16b8291c76",
- "3efa5c88dc2d491c99678d73e4144eeb",
- "b1dfa66aec6a4e86bff78e3a62905a3c",
- "5e4cc08b7aa44cacae18fcc4131c174d",
- "813d16aa8a144cf7b3c1b08d096a2b20",
- "63c15146f75f473394ebcc165ca0d7f6",
- "cc10d97351604ee18918afa1d955a089",
- "6ff94a654ed54915b2082197920e89ab",
- "dc9118ad41d247cab135811a183805e8",
- "3e285ebb0b7d414fb5ab6ee02ccb4f50",
- "ae75d86eb24241d082669a422df4180c",
- "f8e648aa342c43aa9e960622f897e5c5",
- "12051bdac0aa466c91122f3cb0e1ab2b",
- "e9569646ef72451496087c49a2487ddc",
- "23fc55e87153404d90f55930f7f73988",
- "903957b2c1404703b2e59a2875da77ec",
- "cfbb97be5d284781b35abeca297bbad9",
- "ab04ee5ca65f45fc9368a80847926198",
- "0fd23d208e5942439a7e83797ac9fe64",
- "e1da3e30982746959ec7ebd180a3bbb9",
- "0836d18df07244afb40d2cc2f6a3879e",
- "79189477733f4aff8e62c4cbf318e91b",
- "4a4d70a7a8dd4af4b3147161193b1885",
- "b72edebfa2a548b49c47bad5bc7aeecd"
- ]
- },
- "id": "UaOS8pYMyg53",
- "outputId": "28ca11bc-ef21-4bdf-f982-485b431cd21f"
- },
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "\u001b[?25l \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m0.0/302.0 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91mββββββββββββββββ\u001b[0m\u001b[90mβΊ\u001b[0m\u001b[90mβββββββββββββββββββββββ\u001b[0m \u001b[32m122.9/302.0 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m302.0/302.0 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
- "\u001b[?25h"
- ]
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "VBox(children=(HTML(value='