From cf5cd669c306c1cec92c783052f62c99a722d71a Mon Sep 17 00:00:00 2001
From: Niklas Bretz <niklas.bretz@student.reutlingen-university.de>
Date: Fri, 31 Jan 2025 12:13:25 +0100
Subject: [PATCH] =?UTF-8?q?Unn=C3=B6tige=20Funktion=20entfernt?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 benchmark/benchmark.ipynb | 157 ++++++++++++++++++--------------------
 1 file changed, 73 insertions(+), 84 deletions(-)

diff --git a/benchmark/benchmark.ipynb b/benchmark/benchmark.ipynb
index ca3e163..e2a29cc 100644
--- a/benchmark/benchmark.ipynb
+++ b/benchmark/benchmark.ipynb
@@ -3,8 +3,8 @@
   {
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2025-01-31T11:11:12.365214Z",
-     "start_time": "2025-01-31T11:11:12.317218Z"
+     "end_time": "2025-01-31T11:12:14.216466Z",
+     "start_time": "2025-01-31T11:12:14.202447Z"
     }
    },
    "cell_type": "code",
@@ -30,26 +30,26 @@
    ],
    "id": "9f7c6187bfc81ba6",
    "outputs": [],
-   "execution_count": 11
+   "execution_count": 1
   },
   {
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2025-01-31T11:11:14.107618Z",
-     "start_time": "2025-01-31T11:11:14.098522Z"
+     "end_time": "2025-01-31T11:12:18.467020Z",
+     "start_time": "2025-01-31T11:12:18.448483Z"
     }
    },
    "cell_type": "code",
    "source": "trait_categories = [\"EXT\", \"EST\", \"AGR\", \"CSN\", \"OPN\"]",
    "id": "3e451794c0a28fec",
    "outputs": [],
-   "execution_count": 12
+   "execution_count": 2
   },
   {
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2025-01-31T11:11:16.998450Z",
-     "start_time": "2025-01-31T11:11:15.329200Z"
+     "end_time": "2025-01-31T11:12:29.123538Z",
+     "start_time": "2025-01-31T11:12:19.224229Z"
     }
    },
    "cell_type": "code",
@@ -92,33 +92,31 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.\n",
-      "C:\\Users\\qwert\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\accelerate\\utils\\modeling.py:1390: UserWarning: Current model requires 1856 bytes of buffer for offloaded layers, which seems does not fit any GPU's remaining memory. If you are experiencing a OOM later, please consider using offload_buffers=True.\n",
-      "  warnings.warn(\n"
+      "The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.\n"
      ]
     },
     {
-     "ename": "ValueError",
-     "evalue": "Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. ",
-     "output_type": "error",
-     "traceback": [
-      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
-      "\u001B[1;31mValueError\u001B[0m                                Traceback (most recent call last)",
-      "Cell \u001B[1;32mIn[13], line 11\u001B[0m\n\u001B[0;32m      9\u001B[0m tokenizer \u001B[38;5;241m=\u001B[39m AutoTokenizer\u001B[38;5;241m.\u001B[39mfrom_pretrained(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mmeta-llama/Llama-3.2-3B-Instruct\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[0;32m     10\u001B[0m tokenizer\u001B[38;5;241m.\u001B[39mpad_token \u001B[38;5;241m=\u001B[39m tokenizer\u001B[38;5;241m.\u001B[39meos_token\n\u001B[1;32m---> 11\u001B[0m model \u001B[38;5;241m=\u001B[39m \u001B[43mAutoModelForCausalLM\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mfrom_pretrained\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m     12\u001B[0m \u001B[43m    \u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mmeta-llama/Llama-3.2-3B-Instruct\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m,\u001B[49m\n\u001B[0;32m     13\u001B[0m \u001B[43m    \u001B[49m\u001B[43mload_in_8bit\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43;01mTrue\u001B[39;49;00m\u001B[43m,\u001B[49m\u001B[43m  \u001B[49m\u001B[38;5;66;43;03m# This loads the model in 8-bit precision\u001B[39;49;00m\n\u001B[0;32m     14\u001B[0m \u001B[43m    \u001B[49m\u001B[43mdevice_map\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mauto\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m  \u001B[49m\u001B[38;5;66;43;03m# Automatically assigns the model to available devices\u001B[39;49;00m\n\u001B[0;32m     15\u001B[0m \u001B[43m)\u001B[49m\n\u001B[0;32m     17\u001B[0m custom_pipeline \u001B[38;5;241m=\u001B[39m pipeline(\n\u001B[0;32m     18\u001B[0m     \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mtext-generation\u001B[39m\u001B[38;5;124m\"\u001B[39m,\n\u001B[0;32m     19\u001B[0m     model\u001B[38;5;241m=\u001B[39mmodel,\n\u001B[0;32m     20\u001B[0m     tokenizer\u001B[38;5;241m=\u001B[39mtokenizer,\n\u001B[0;32m     21\u001B[0m )\n\u001B[0;32m     22\u001B[0m terminators \u001B[38;5;241m=\u001B[39m [\n\u001B[0;32m     23\u001B[0m     custom_pipeline\u001B[38;5;241m.\u001B[39mtokenizer\u001B[38;5;241m.\u001B[39meos_token_id,\n\u001B[0;32m     24\u001B[0m     custom_pipeline\u001B[38;5;241m.\u001B[39mtokenizer\u001B[38;5;241m.\u001B[39mconvert_tokens_to_ids(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m\"\u001B[39m),\n\u001B[0;32m     25\u001B[0m ]\n",
-      "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\models\\auto\\auto_factory.py:564\u001B[0m, in \u001B[0;36m_BaseAutoModelClass.from_pretrained\u001B[1;34m(cls, pretrained_model_name_or_path, *model_args, **kwargs)\u001B[0m\n\u001B[0;32m    562\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m \u001B[38;5;28mtype\u001B[39m(config) \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28mcls\u001B[39m\u001B[38;5;241m.\u001B[39m_model_mapping\u001B[38;5;241m.\u001B[39mkeys():\n\u001B[0;32m    563\u001B[0m     model_class \u001B[38;5;241m=\u001B[39m _get_model_class(config, \u001B[38;5;28mcls\u001B[39m\u001B[38;5;241m.\u001B[39m_model_mapping)\n\u001B[1;32m--> 564\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m model_class\u001B[38;5;241m.\u001B[39mfrom_pretrained(\n\u001B[0;32m    565\u001B[0m         pretrained_model_name_or_path, \u001B[38;5;241m*\u001B[39mmodel_args, config\u001B[38;5;241m=\u001B[39mconfig, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mhub_kwargs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs\n\u001B[0;32m    566\u001B[0m     )\n\u001B[0;32m    567\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\n\u001B[0;32m    568\u001B[0m     \u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mUnrecognized configuration class \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mconfig\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__class__\u001B[39m\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m for this kind of AutoModel: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00m\u001B[38;5;28mcls\u001B[39m\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m.\u001B[39m\u001B[38;5;130;01m\\n\u001B[39;00m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m    569\u001B[0m     \u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mModel type should be one of \u001B[39m\u001B[38;5;132;01m{\u001B[39;00m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124m, \u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;241m.\u001B[39mjoin(c\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m\u001B[38;5;250m \u001B[39m\u001B[38;5;28;01mfor\u001B[39;00m\u001B[38;5;250m \u001B[39mc\u001B[38;5;250m \u001B[39m\u001B[38;5;129;01min\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[38;5;28mcls\u001B[39m\u001B[38;5;241m.\u001B[39m_model_mapping\u001B[38;5;241m.\u001B[39mkeys())\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m.\u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m    570\u001B[0m )\n",
-      "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\modeling_utils.py:3963\u001B[0m, in \u001B[0;36mPreTrainedModel.from_pretrained\u001B[1;34m(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs)\u001B[0m\n\u001B[0;32m   3960\u001B[0m     device_map \u001B[38;5;241m=\u001B[39m infer_auto_device_map(model, dtype\u001B[38;5;241m=\u001B[39mtarget_dtype, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mdevice_map_kwargs)\n\u001B[0;32m   3962\u001B[0m     \u001B[38;5;28;01mif\u001B[39;00m hf_quantizer \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[1;32m-> 3963\u001B[0m         \u001B[43mhf_quantizer\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mvalidate_environment\u001B[49m\u001B[43m(\u001B[49m\u001B[43mdevice_map\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mdevice_map\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m   3965\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m device_map \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[0;32m   3966\u001B[0m     model\u001B[38;5;241m.\u001B[39mtie_weights()\n",
-      "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\quantizers\\quantizer_bnb_8bit.py:101\u001B[0m, in \u001B[0;36mBnb8BitHfQuantizer.validate_environment\u001B[1;34m(self, *args, **kwargs)\u001B[0m\n\u001B[0;32m     99\u001B[0m         \u001B[38;5;28;01mpass\u001B[39;00m\n\u001B[0;32m    100\u001B[0m     \u001B[38;5;28;01melif\u001B[39;00m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mcpu\u001B[39m\u001B[38;5;124m\"\u001B[39m \u001B[38;5;129;01min\u001B[39;00m device_map_without_lm_head\u001B[38;5;241m.\u001B[39mvalues() \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mdisk\u001B[39m\u001B[38;5;124m\"\u001B[39m \u001B[38;5;129;01min\u001B[39;00m device_map_without_lm_head\u001B[38;5;241m.\u001B[39mvalues():\n\u001B[1;32m--> 101\u001B[0m         \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\n\u001B[0;32m    102\u001B[0m             \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mSome modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the \u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m    103\u001B[0m             \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mquantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules \u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m    104\u001B[0m             \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124min 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom `device_map` to \u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m    105\u001B[0m             \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m`from_pretrained`. Check \u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m    106\u001B[0m             \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mhttps://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu \u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m    107\u001B[0m             \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mfor more details. \u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m    108\u001B[0m         )\n\u001B[0;32m    110\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m version\u001B[38;5;241m.\u001B[39mparse(importlib\u001B[38;5;241m.\u001B[39mmetadata\u001B[38;5;241m.\u001B[39mversion(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mbitsandbytes\u001B[39m\u001B[38;5;124m\"\u001B[39m)) \u001B[38;5;241m<\u001B[39m version\u001B[38;5;241m.\u001B[39mparse(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m0.37.2\u001B[39m\u001B[38;5;124m\"\u001B[39m):\n\u001B[0;32m    111\u001B[0m     \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\n\u001B[0;32m    112\u001B[0m         \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mYou have a version of `bitsandbytes` that is not compatible with 8bit inference and training\u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m    113\u001B[0m         \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m make sure you have the latest version of `bitsandbytes` installed\u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m    114\u001B[0m     )\n",
-      "\u001B[1;31mValueError\u001B[0m: Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. "
-     ]
+     "data": {
+      "text/plain": [
+       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
+      ],
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "f91c638e4399428d8f3b321cc1ae071d"
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
     }
    ],
-   "execution_count": 13
+   "execution_count": 3
   },
   {
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2025-01-31T10:58:27.934353Z",
-     "start_time": "2025-01-31T10:58:27.924834Z"
+     "end_time": "2025-01-31T11:12:32.705652Z",
+     "start_time": "2025-01-31T11:12:32.701140Z"
     }
    },
    "cell_type": "code",
@@ -144,8 +142,8 @@
   {
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2025-01-31T10:58:29.489756Z",
-     "start_time": "2025-01-31T10:58:29.461648Z"
+     "end_time": "2025-01-31T11:12:33.935985Z",
+     "start_time": "2025-01-31T11:12:33.917759Z"
     }
    },
    "cell_type": "code",
@@ -207,8 +205,8 @@
   {
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2025-01-31T11:00:58.310059Z",
-     "start_time": "2025-01-31T11:00:58.272535Z"
+     "end_time": "2025-01-31T11:12:36.111618Z",
+     "start_time": "2025-01-31T11:12:36.089989Z"
     }
    },
    "cell_type": "code",
@@ -316,39 +314,8 @@
   {
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2025-01-31T11:01:00.823080Z",
-     "start_time": "2025-01-31T11:01:00.816079Z"
-    }
-   },
-   "cell_type": "code",
-   "source": [
-    "def eval_trait(\n",
-    "        trait,\n",
-    "        num_statements,\n",
-    "        mode,\n",
-    "        samples,\n",
-    "        batches,\n",
-    "        optimized_prompt\n",
-    "):\n",
-    "    trait_eval_results = []\n",
-    "\n",
-    "    for i in range(0, batches):\n",
-    "\n",
-    "        prediction_true_arr = eval_llm(samples, trait, num_statements, mode)\n",
-    "        trait_eval_results = [samples, trait, num_statements, prediction_true_arr]\n",
-    "        print(\"Batch: \" + (i+1).__str__() + \" Prompts: \" + samples.__str__())\n",
-    "\n",
-    "    return trait_eval_results"
-   ],
-   "id": "5133362b9d7d2863",
-   "outputs": [],
-   "execution_count": 7
-  },
-  {
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2025-01-31T11:01:02.328168Z",
-     "start_time": "2025-01-31T11:01:02.310961Z"
+     "end_time": "2025-01-31T11:13:09.666818Z",
+     "start_time": "2025-01-31T11:13:09.648801Z"
     }
    },
    "cell_type": "code",
@@ -371,8 +338,8 @@
   {
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2025-01-31T11:04:35.503990Z",
-     "start_time": "2025-01-31T11:01:11.569206Z"
+     "end_time": "2025-01-31T11:13:13.939212Z",
+     "start_time": "2025-01-31T11:13:10.754886Z"
     }
    },
    "cell_type": "code",
@@ -440,31 +407,53 @@
       "C:\\Users\\qwert\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\generation\\configuration_utils.py:601: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.6` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.\n",
       "  warnings.warn(\n",
       "C:\\Users\\qwert\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\generation\\configuration_utils.py:606: UserWarning: `do_sample` is set to `False`. However, `top_p` is set to `0.9` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `top_p`.\n",
-      "  warnings.warn(\n"
+      "  warnings.warn(\n",
+      "C:\\Users\\qwert\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\models\\llama\\modeling_llama.py:655: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\\actions-runner\\_work\\pytorch\\pytorch\\builder\\windows\\pytorch\\aten\\src\\ATen\\native\\transformers\\cuda\\sdp_utils.cpp:555.)\n",
+      "  attn_output = torch.nn.functional.scaled_dot_product_attention(\n",
+      "Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)\n"
      ]
     },
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "LLM:  1 Mensch: 5 | Dfs:EXT Entry:21\n",
-      "Based on the provided scale, predict the person's response to the new statement considering their previous answers.\n",
-      "----------------\n",
-      "LLM:  1 Mensch: 5 | Dfs:EST Entry:16\n",
-      "Based on the provided scale, predict the person's response to the new statement considering their previous answers.\n",
-      "----------------\n",
-      "LLM:  1 Mensch: 5 | Dfs:AGR Entry:58\n",
-      "Based on the provided scale, predict the person's response to the new statement considering their previous answers.\n",
-      "----------------\n",
-      "LLM:  1 Mensch: 5 | Dfs:OPN Entry:13\n",
-      "Based on the provided scale, predict the person's response to the new statement considering their previous answers.\n",
-      "----------------\n",
-      "Failed iterations: 0\n",
-      "Accuracy: 34.02298850574713\n"
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
+      "\u001B[1;31mKeyboardInterrupt\u001B[0m                         Traceback (most recent call last)",
+      "Cell \u001B[1;32mIn[9], line 8\u001B[0m\n\u001B[0;32m      4\u001B[0m importlib\u001B[38;5;241m.\u001B[39mreload(OtherUtils)\n\u001B[0;32m      6\u001B[0m prompt \u001B[38;5;241m=\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mBased on the provided scale, predict the person\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124ms response to the new statement considering their previous answers.\u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[1;32m----> 8\u001B[0m individual_result \u001B[38;5;241m=\u001B[39m \u001B[43meval_random_trait\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m      9\u001B[0m \u001B[43m    \u001B[49m\u001B[43mnum_statements\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43m \u001B[49m\u001B[38;5;241;43m5\u001B[39;49m\u001B[43m,\u001B[49m\n\u001B[0;32m     10\u001B[0m \u001B[43m    \u001B[49m\u001B[43mmode\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43m \u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mnumbers\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m,\u001B[49m\n\u001B[0;32m     11\u001B[0m \u001B[43m    \u001B[49m\u001B[43moptimized_prompt\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43m \u001B[49m\u001B[43mprompt\u001B[49m\n\u001B[0;32m     12\u001B[0m \u001B[43m)\u001B[49m\n\u001B[0;32m     13\u001B[0m acc \u001B[38;5;241m=\u001B[39m Plots\u001B[38;5;241m.\u001B[39mget_accuracy(individual_result)\n\u001B[0;32m     14\u001B[0m \u001B[38;5;28mprint\u001B[39m(\u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mAccuracy: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00macc\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m\"\u001B[39m)\n",
+      "Cell \u001B[1;32mIn[8], line 7\u001B[0m, in \u001B[0;36meval_random_trait\u001B[1;34m(num_statements, mode, optimized_prompt)\u001B[0m\n\u001B[0;32m      1\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21meval_random_trait\u001B[39m(\n\u001B[0;32m      2\u001B[0m         num_statements,\n\u001B[0;32m      3\u001B[0m         mode,\n\u001B[0;32m      4\u001B[0m         optimized_prompt\n\u001B[0;32m      5\u001B[0m ):\n\u001B[0;32m      6\u001B[0m     eval_results \u001B[38;5;241m=\u001B[39m []\n\u001B[1;32m----> 7\u001B[0m     prediction_true_arr \u001B[38;5;241m=\u001B[39m \u001B[43meval_llm\u001B[49m\u001B[43m(\u001B[49m\u001B[43mnum_statements\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mmode\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43moptimized_prompt\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m      8\u001B[0m     eval_results \u001B[38;5;241m=\u001B[39m [num_statements, prediction_true_arr]\n\u001B[0;32m     10\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m eval_results\n",
+      "Cell \u001B[1;32mIn[6], line 56\u001B[0m, in \u001B[0;36meval_llm\u001B[1;34m(num_statements, mode, optimized_prompt)\u001B[0m\n\u001B[0;32m     53\u001B[0m current_question \u001B[38;5;241m=\u001B[39m personality_traits[field_of_interest_formatted]\n\u001B[0;32m     54\u001B[0m human_answer \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mint\u001B[39m(entry[\u001B[38;5;241m1\u001B[39m][field_of_interest_formatted])\n\u001B[1;32m---> 56\u001B[0m llm_ans \u001B[38;5;241m=\u001B[39m \u001B[43mprompt_llm\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m     57\u001B[0m \u001B[43m    \u001B[49m\u001B[43mpersonality_statements\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m     58\u001B[0m \u001B[43m    \u001B[49m\u001B[43mcurrent_question\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m     59\u001B[0m \u001B[43m    \u001B[49m\u001B[43mnum_statements\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m     60\u001B[0m \u001B[43m    \u001B[49m\u001B[43mprev_questions\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m     61\u001B[0m \u001B[43m    \u001B[49m\u001B[43mprev_answers\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m     62\u001B[0m \u001B[43m    \u001B[49m\u001B[43mmode\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m     63\u001B[0m \u001B[43m    \u001B[49m\u001B[43moptimized_prompt\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m     64\u001B[0m \u001B[43m    \u001B[49m\u001B[43mprint_prompt\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;129;43;01mnot\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[43mfirst_printed\u001B[49m\n\u001B[0;32m     65\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m     67\u001B[0m first_printed \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mTrue\u001B[39;00m\n\u001B[0;32m     69\u001B[0m \u001B[38;5;66;03m#print(f\"LLM: {llm_ans} Mensch: {human_answer}\")\u001B[39;00m\n",
+      "Cell \u001B[1;32mIn[5], line 40\u001B[0m, in \u001B[0;36mprompt_llm\u001B[1;34m(personality_statement, current_question, num_statements, prev_questions, prev_answers, mode, optimized_prompt, print_prompt)\u001B[0m\n\u001B[0;32m     36\u001B[0m prompt \u001B[38;5;241m=\u001B[39m custom_chat_template(prompt)\n\u001B[0;32m     38\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m print_prompt: \u001B[38;5;28mprint\u001B[39m(prompt)\n\u001B[1;32m---> 40\u001B[0m outputs \u001B[38;5;241m=\u001B[39m \u001B[43mcustom_pipeline\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m     41\u001B[0m \u001B[43m    \u001B[49m\u001B[43mprompt\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m     42\u001B[0m \u001B[43m    \u001B[49m\u001B[43mmax_new_tokens\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;241;43m20\u001B[39;49m\u001B[43m,\u001B[49m\n\u001B[0;32m     43\u001B[0m \u001B[43m    \u001B[49m\u001B[43meos_token_id\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mcustom_pipeline\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mtokenizer\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43meos_token_id\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m     44\u001B[0m \u001B[43m    \u001B[49m\u001B[43mdo_sample\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43;01mFalse\u001B[39;49;00m\n\u001B[0;32m     45\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m     47\u001B[0m response \u001B[38;5;241m=\u001B[39m outputs[\u001B[38;5;241m0\u001B[39m][\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mgenerated_text\u001B[39m\u001B[38;5;124m\"\u001B[39m][\u001B[38;5;28mlen\u001B[39m(prompt):]\n\u001B[0;32m     49\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m response\n",
+      "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\pipelines\\text_generation.py:272\u001B[0m, in \u001B[0;36mTextGenerationPipeline.__call__\u001B[1;34m(self, text_inputs, **kwargs)\u001B[0m\n\u001B[0;32m    270\u001B[0m         \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28msuper\u001B[39m()\u001B[38;5;241m.\u001B[39m\u001B[38;5;21m__call__\u001B[39m(chats, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n\u001B[0;32m    271\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m--> 272\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28msuper\u001B[39m()\u001B[38;5;241m.\u001B[39m\u001B[38;5;21m__call__\u001B[39m(text_inputs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n",
+      "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\pipelines\\base.py:1268\u001B[0m, in \u001B[0;36mPipeline.__call__\u001B[1;34m(self, inputs, num_workers, batch_size, *args, **kwargs)\u001B[0m\n\u001B[0;32m   1260\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mnext\u001B[39m(\n\u001B[0;32m   1261\u001B[0m         \u001B[38;5;28miter\u001B[39m(\n\u001B[0;32m   1262\u001B[0m             \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mget_iterator(\n\u001B[1;32m   (...)\u001B[0m\n\u001B[0;32m   1265\u001B[0m         )\n\u001B[0;32m   1266\u001B[0m     )\n\u001B[0;32m   1267\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m-> 1268\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mrun_single\u001B[49m\u001B[43m(\u001B[49m\u001B[43minputs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mpreprocess_params\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mforward_params\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mpostprocess_params\u001B[49m\u001B[43m)\u001B[49m\n",
+      "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\pipelines\\base.py:1275\u001B[0m, in \u001B[0;36mPipeline.run_single\u001B[1;34m(self, inputs, preprocess_params, forward_params, postprocess_params)\u001B[0m\n\u001B[0;32m   1273\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mrun_single\u001B[39m(\u001B[38;5;28mself\u001B[39m, inputs, preprocess_params, forward_params, postprocess_params):\n\u001B[0;32m   1274\u001B[0m     model_inputs \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mpreprocess(inputs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mpreprocess_params)\n\u001B[1;32m-> 1275\u001B[0m     model_outputs \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mforward(model_inputs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mforward_params)\n\u001B[0;32m   1276\u001B[0m     outputs \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mpostprocess(model_outputs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mpostprocess_params)\n\u001B[0;32m   1277\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m outputs\n",
+      "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\pipelines\\base.py:1175\u001B[0m, in \u001B[0;36mPipeline.forward\u001B[1;34m(self, model_inputs, **forward_params)\u001B[0m\n\u001B[0;32m   1173\u001B[0m     \u001B[38;5;28;01mwith\u001B[39;00m inference_context():\n\u001B[0;32m   1174\u001B[0m         model_inputs \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_ensure_tensor_on_device(model_inputs, device\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdevice)\n\u001B[1;32m-> 1175\u001B[0m         model_outputs \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward(model_inputs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mforward_params)\n\u001B[0;32m   1176\u001B[0m         model_outputs \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_ensure_tensor_on_device(model_outputs, device\u001B[38;5;241m=\u001B[39mtorch\u001B[38;5;241m.\u001B[39mdevice(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mcpu\u001B[39m\u001B[38;5;124m\"\u001B[39m))\n\u001B[0;32m   1177\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n",
+      "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\pipelines\\text_generation.py:370\u001B[0m, in \u001B[0;36mTextGenerationPipeline._forward\u001B[1;34m(self, model_inputs, **generate_kwargs)\u001B[0m\n\u001B[0;32m    367\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mgeneration_config\u001B[39m\u001B[38;5;124m\"\u001B[39m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;129;01min\u001B[39;00m generate_kwargs:\n\u001B[0;32m    368\u001B[0m     generate_kwargs[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mgeneration_config\u001B[39m\u001B[38;5;124m\"\u001B[39m] \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mgeneration_config\n\u001B[1;32m--> 370\u001B[0m generated_sequence \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mmodel\u001B[38;5;241m.\u001B[39mgenerate(input_ids\u001B[38;5;241m=\u001B[39minput_ids, attention_mask\u001B[38;5;241m=\u001B[39mattention_mask, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mgenerate_kwargs)\n\u001B[0;32m    371\u001B[0m out_b \u001B[38;5;241m=\u001B[39m generated_sequence\u001B[38;5;241m.\u001B[39mshape[\u001B[38;5;241m0\u001B[39m]\n\u001B[0;32m    372\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mframework \u001B[38;5;241m==\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mpt\u001B[39m\u001B[38;5;124m\"\u001B[39m:\n",
+      "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\torch\\utils\\_contextlib.py:116\u001B[0m, in \u001B[0;36mcontext_decorator.<locals>.decorate_context\u001B[1;34m(*args, **kwargs)\u001B[0m\n\u001B[0;32m    113\u001B[0m \u001B[38;5;129m@functools\u001B[39m\u001B[38;5;241m.\u001B[39mwraps(func)\n\u001B[0;32m    114\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mdecorate_context\u001B[39m(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs):\n\u001B[0;32m    115\u001B[0m     \u001B[38;5;28;01mwith\u001B[39;00m ctx_factory():\n\u001B[1;32m--> 116\u001B[0m         \u001B[38;5;28;01mreturn\u001B[39;00m func(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n",
+      "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\generation\\utils.py:2047\u001B[0m, in \u001B[0;36mGenerationMixin.generate\u001B[1;34m(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)\u001B[0m\n\u001B[0;32m   2039\u001B[0m     input_ids, model_kwargs \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_expand_inputs_for_generation(\n\u001B[0;32m   2040\u001B[0m         input_ids\u001B[38;5;241m=\u001B[39minput_ids,\n\u001B[0;32m   2041\u001B[0m         expand_size\u001B[38;5;241m=\u001B[39mgeneration_config\u001B[38;5;241m.\u001B[39mnum_return_sequences,\n\u001B[0;32m   2042\u001B[0m         is_encoder_decoder\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mconfig\u001B[38;5;241m.\u001B[39mis_encoder_decoder,\n\u001B[0;32m   2043\u001B[0m         \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mmodel_kwargs,\n\u001B[0;32m   2044\u001B[0m     )\n\u001B[0;32m   2046\u001B[0m     \u001B[38;5;66;03m# 12. run sample (it degenerates to greedy search when `generation_config.do_sample=False`)\u001B[39;00m\n\u001B[1;32m-> 2047\u001B[0m     result \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_sample(\n\u001B[0;32m   2048\u001B[0m         input_ids,\n\u001B[0;32m   2049\u001B[0m         logits_processor\u001B[38;5;241m=\u001B[39mprepared_logits_processor,\n\u001B[0;32m   2050\u001B[0m         stopping_criteria\u001B[38;5;241m=\u001B[39mprepared_stopping_criteria,\n\u001B[0;32m   2051\u001B[0m         generation_config\u001B[38;5;241m=\u001B[39mgeneration_config,\n\u001B[0;32m   2052\u001B[0m         synced_gpus\u001B[38;5;241m=\u001B[39msynced_gpus,\n\u001B[0;32m   2053\u001B[0m         streamer\u001B[38;5;241m=\u001B[39mstreamer,\n\u001B[0;32m   2054\u001B[0m         \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mmodel_kwargs,\n\u001B[0;32m   2055\u001B[0m     )\n\u001B[0;32m   2057\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m generation_mode \u001B[38;5;129;01min\u001B[39;00m (GenerationMode\u001B[38;5;241m.\u001B[39mBEAM_SAMPLE, GenerationMode\u001B[38;5;241m.\u001B[39mBEAM_SEARCH):\n\u001B[0;32m   2058\u001B[0m     \u001B[38;5;66;03m# 11. prepare beam search scorer\u001B[39;00m\n\u001B[0;32m   2059\u001B[0m     beam_scorer \u001B[38;5;241m=\u001B[39m BeamSearchScorer(\n\u001B[0;32m   2060\u001B[0m         batch_size\u001B[38;5;241m=\u001B[39mbatch_size,\n\u001B[0;32m   2061\u001B[0m         num_beams\u001B[38;5;241m=\u001B[39mgeneration_config\u001B[38;5;241m.\u001B[39mnum_beams,\n\u001B[1;32m   (...)\u001B[0m\n\u001B[0;32m   2066\u001B[0m         max_length\u001B[38;5;241m=\u001B[39mgeneration_config\u001B[38;5;241m.\u001B[39mmax_length,\n\u001B[0;32m   2067\u001B[0m     )\n",
+      "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\generation\\utils.py:3007\u001B[0m, in \u001B[0;36mGenerationMixin._sample\u001B[1;34m(self, input_ids, logits_processor, stopping_criteria, generation_config, synced_gpus, streamer, **model_kwargs)\u001B[0m\n\u001B[0;32m   3004\u001B[0m model_inputs\u001B[38;5;241m.\u001B[39mupdate({\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124moutput_hidden_states\u001B[39m\u001B[38;5;124m\"\u001B[39m: output_hidden_states} \u001B[38;5;28;01mif\u001B[39;00m output_hidden_states \u001B[38;5;28;01melse\u001B[39;00m {})\n\u001B[0;32m   3006\u001B[0m \u001B[38;5;66;03m# forward pass to get next token\u001B[39;00m\n\u001B[1;32m-> 3007\u001B[0m outputs \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m(\u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mmodel_inputs, return_dict\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m)\n\u001B[0;32m   3009\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m synced_gpus \u001B[38;5;129;01mand\u001B[39;00m this_peer_finished:\n\u001B[0;32m   3010\u001B[0m     \u001B[38;5;28;01mcontinue\u001B[39;00m  \u001B[38;5;66;03m# don't waste resources running the code we don't need\u001B[39;00m\n",
+      "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\torch\\nn\\modules\\module.py:1553\u001B[0m, in \u001B[0;36mModule._wrapped_call_impl\u001B[1;34m(self, *args, **kwargs)\u001B[0m\n\u001B[0;32m   1551\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_compiled_call_impl(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)  \u001B[38;5;66;03m# type: ignore[misc]\u001B[39;00m\n\u001B[0;32m   1552\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m-> 1553\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_call_impl(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n",
+      "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\torch\\nn\\modules\\module.py:1562\u001B[0m, in \u001B[0;36mModule._call_impl\u001B[1;34m(self, *args, **kwargs)\u001B[0m\n\u001B[0;32m   1557\u001B[0m \u001B[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001B[39;00m\n\u001B[0;32m   1558\u001B[0m \u001B[38;5;66;03m# this function, and just call forward.\u001B[39;00m\n\u001B[0;32m   1559\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m (\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_pre_hooks\n\u001B[0;32m   1560\u001B[0m         \u001B[38;5;129;01mor\u001B[39;00m _global_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_backward_hooks\n\u001B[0;32m   1561\u001B[0m         \u001B[38;5;129;01mor\u001B[39;00m _global_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_forward_pre_hooks):\n\u001B[1;32m-> 1562\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m forward_call(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n\u001B[0;32m   1564\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m   1565\u001B[0m     result \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m\n",
+      "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\accelerate\\hooks.py:170\u001B[0m, in \u001B[0;36madd_hook_to_module.<locals>.new_forward\u001B[1;34m(module, *args, **kwargs)\u001B[0m\n\u001B[0;32m    168\u001B[0m         output \u001B[38;5;241m=\u001B[39m module\u001B[38;5;241m.\u001B[39m_old_forward(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n\u001B[0;32m    169\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m--> 170\u001B[0m     output \u001B[38;5;241m=\u001B[39m module\u001B[38;5;241m.\u001B[39m_old_forward(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n\u001B[0;32m    171\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m module\u001B[38;5;241m.\u001B[39m_hf_hook\u001B[38;5;241m.\u001B[39mpost_forward(module, output)\n",
+      "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\models\\llama\\modeling_llama.py:1189\u001B[0m, in \u001B[0;36mLlamaForCausalLM.forward\u001B[1;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict, cache_position, num_logits_to_keep)\u001B[0m\n\u001B[0;32m   1186\u001B[0m return_dict \u001B[38;5;241m=\u001B[39m return_dict \u001B[38;5;28;01mif\u001B[39;00m return_dict \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m \u001B[38;5;28;01melse\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mconfig\u001B[38;5;241m.\u001B[39muse_return_dict\n\u001B[0;32m   1188\u001B[0m \u001B[38;5;66;03m# decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)\u001B[39;00m\n\u001B[1;32m-> 1189\u001B[0m outputs \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mmodel\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m   1190\u001B[0m \u001B[43m    \u001B[49m\u001B[43minput_ids\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43minput_ids\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   1191\u001B[0m \u001B[43m    \u001B[49m\u001B[43mattention_mask\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mattention_mask\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   1192\u001B[0m \u001B[43m    \u001B[49m\u001B[43mposition_ids\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mposition_ids\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   1193\u001B[0m \u001B[43m    \u001B[49m\u001B[43mpast_key_values\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mpast_key_values\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   1194\u001B[0m \u001B[43m    \u001B[49m\u001B[43minputs_embeds\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43minputs_embeds\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   1195\u001B[0m \u001B[43m    \u001B[49m\u001B[43muse_cache\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43muse_cache\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   1196\u001B[0m \u001B[43m    \u001B[49m\u001B[43moutput_attentions\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43moutput_attentions\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   1197\u001B[0m \u001B[43m    \u001B[49m\u001B[43moutput_hidden_states\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43moutput_hidden_states\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   1198\u001B[0m \u001B[43m    \u001B[49m\u001B[43mreturn_dict\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mreturn_dict\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   1199\u001B[0m \u001B[43m    \u001B[49m\u001B[43mcache_position\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mcache_position\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   1200\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m   1202\u001B[0m hidden_states \u001B[38;5;241m=\u001B[39m outputs[\u001B[38;5;241m0\u001B[39m]\n\u001B[0;32m   1203\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mconfig\u001B[38;5;241m.\u001B[39mpretraining_tp \u001B[38;5;241m>\u001B[39m \u001B[38;5;241m1\u001B[39m:\n",
+      "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\torch\\nn\\modules\\module.py:1553\u001B[0m, in \u001B[0;36mModule._wrapped_call_impl\u001B[1;34m(self, *args, **kwargs)\u001B[0m\n\u001B[0;32m   1551\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_compiled_call_impl(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)  \u001B[38;5;66;03m# type: ignore[misc]\u001B[39;00m\n\u001B[0;32m   1552\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m-> 1553\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_call_impl(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n",
+      "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\torch\\nn\\modules\\module.py:1562\u001B[0m, in \u001B[0;36mModule._call_impl\u001B[1;34m(self, *args, **kwargs)\u001B[0m\n\u001B[0;32m   1557\u001B[0m \u001B[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001B[39;00m\n\u001B[0;32m   1558\u001B[0m \u001B[38;5;66;03m# this function, and just call forward.\u001B[39;00m\n\u001B[0;32m   1559\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m (\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_pre_hooks\n\u001B[0;32m   1560\u001B[0m         \u001B[38;5;129;01mor\u001B[39;00m _global_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_backward_hooks\n\u001B[0;32m   1561\u001B[0m         \u001B[38;5;129;01mor\u001B[39;00m _global_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_forward_pre_hooks):\n\u001B[1;32m-> 1562\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m forward_call(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n\u001B[0;32m   1564\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m   1565\u001B[0m     result \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m\n",
+      "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\accelerate\\hooks.py:170\u001B[0m, in \u001B[0;36madd_hook_to_module.<locals>.new_forward\u001B[1;34m(module, *args, **kwargs)\u001B[0m\n\u001B[0;32m    168\u001B[0m         output \u001B[38;5;241m=\u001B[39m module\u001B[38;5;241m.\u001B[39m_old_forward(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n\u001B[0;32m    169\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m--> 170\u001B[0m     output \u001B[38;5;241m=\u001B[39m module\u001B[38;5;241m.\u001B[39m_old_forward(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n\u001B[0;32m    171\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m module\u001B[38;5;241m.\u001B[39m_hf_hook\u001B[38;5;241m.\u001B[39mpost_forward(module, output)\n",
+      "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\models\\llama\\modeling_llama.py:1000\u001B[0m, in \u001B[0;36mLlamaModel.forward\u001B[1;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict, cache_position)\u001B[0m\n\u001B[0;32m    988\u001B[0m     layer_outputs \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_gradient_checkpointing_func(\n\u001B[0;32m    989\u001B[0m         decoder_layer\u001B[38;5;241m.\u001B[39m\u001B[38;5;21m__call__\u001B[39m,\n\u001B[0;32m    990\u001B[0m         hidden_states,\n\u001B[1;32m   (...)\u001B[0m\n\u001B[0;32m    997\u001B[0m         position_embeddings,\n\u001B[0;32m    998\u001B[0m     )\n\u001B[0;32m    999\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m-> 1000\u001B[0m     layer_outputs \u001B[38;5;241m=\u001B[39m \u001B[43mdecoder_layer\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m   1001\u001B[0m \u001B[43m        \u001B[49m\u001B[43mhidden_states\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   1002\u001B[0m \u001B[43m        \u001B[49m\u001B[43mattention_mask\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mcausal_mask\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   1003\u001B[0m \u001B[43m        \u001B[49m\u001B[43mposition_ids\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mposition_ids\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   1004\u001B[0m \u001B[43m        \u001B[49m\u001B[43mpast_key_value\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mpast_key_values\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   1005\u001B[0m \u001B[43m        \u001B[49m\u001B[43moutput_attentions\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43moutput_attentions\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   1006\u001B[0m \u001B[43m        \u001B[49m\u001B[43muse_cache\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43muse_cache\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   1007\u001B[0m \u001B[43m        \u001B[49m\u001B[43mcache_position\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mcache_position\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   1008\u001B[0m \u001B[43m        \u001B[49m\u001B[43mposition_embeddings\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mposition_embeddings\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   1009\u001B[0m \u001B[43m    \u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m   1011\u001B[0m hidden_states \u001B[38;5;241m=\u001B[39m layer_outputs[\u001B[38;5;241m0\u001B[39m]\n\u001B[0;32m   1013\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m use_cache:\n",
+      "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\torch\\nn\\modules\\module.py:1553\u001B[0m, in \u001B[0;36mModule._wrapped_call_impl\u001B[1;34m(self, *args, **kwargs)\u001B[0m\n\u001B[0;32m   1551\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_compiled_call_impl(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)  \u001B[38;5;66;03m# type: ignore[misc]\u001B[39;00m\n\u001B[0;32m   1552\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m-> 1553\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_call_impl(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n",
+      "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\torch\\nn\\modules\\module.py:1562\u001B[0m, in \u001B[0;36mModule._call_impl\u001B[1;34m(self, *args, **kwargs)\u001B[0m\n\u001B[0;32m   1557\u001B[0m \u001B[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001B[39;00m\n\u001B[0;32m   1558\u001B[0m \u001B[38;5;66;03m# this function, and just call forward.\u001B[39;00m\n\u001B[0;32m   1559\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m (\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_pre_hooks\n\u001B[0;32m   1560\u001B[0m         \u001B[38;5;129;01mor\u001B[39;00m _global_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_backward_hooks\n\u001B[0;32m   1561\u001B[0m         \u001B[38;5;129;01mor\u001B[39;00m _global_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_forward_pre_hooks):\n\u001B[1;32m-> 1562\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m forward_call(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n\u001B[0;32m   1564\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m   1565\u001B[0m     result \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m\n",
+      "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\accelerate\\hooks.py:170\u001B[0m, in \u001B[0;36madd_hook_to_module.<locals>.new_forward\u001B[1;34m(module, *args, **kwargs)\u001B[0m\n\u001B[0;32m    168\u001B[0m         output \u001B[38;5;241m=\u001B[39m module\u001B[38;5;241m.\u001B[39m_old_forward(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n\u001B[0;32m    169\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m--> 170\u001B[0m     output \u001B[38;5;241m=\u001B[39m module\u001B[38;5;241m.\u001B[39m_old_forward(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n\u001B[0;32m    171\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m module\u001B[38;5;241m.\u001B[39m_hf_hook\u001B[38;5;241m.\u001B[39mpost_forward(module, output)\n",
+      "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\models\\llama\\modeling_llama.py:729\u001B[0m, in \u001B[0;36mLlamaDecoderLayer.forward\u001B[1;34m(self, hidden_states, attention_mask, position_ids, past_key_value, output_attentions, use_cache, cache_position, position_embeddings, **kwargs)\u001B[0m\n\u001B[0;32m    726\u001B[0m hidden_states \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39minput_layernorm(hidden_states)\n\u001B[0;32m    728\u001B[0m \u001B[38;5;66;03m# Self Attention\u001B[39;00m\n\u001B[1;32m--> 729\u001B[0m hidden_states, self_attn_weights, present_key_value \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mself_attn(\n\u001B[0;32m    730\u001B[0m     hidden_states\u001B[38;5;241m=\u001B[39mhidden_states,\n\u001B[0;32m    731\u001B[0m     attention_mask\u001B[38;5;241m=\u001B[39mattention_mask,\n\u001B[0;32m    732\u001B[0m     position_ids\u001B[38;5;241m=\u001B[39mposition_ids,\n\u001B[0;32m    733\u001B[0m     past_key_value\u001B[38;5;241m=\u001B[39mpast_key_value,\n\u001B[0;32m    734\u001B[0m     output_attentions\u001B[38;5;241m=\u001B[39moutput_attentions,\n\u001B[0;32m    735\u001B[0m     use_cache\u001B[38;5;241m=\u001B[39muse_cache,\n\u001B[0;32m    736\u001B[0m     cache_position\u001B[38;5;241m=\u001B[39mcache_position,\n\u001B[0;32m    737\u001B[0m     position_embeddings\u001B[38;5;241m=\u001B[39mposition_embeddings,\n\u001B[0;32m    738\u001B[0m     \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs,\n\u001B[0;32m    739\u001B[0m )\n\u001B[0;32m    740\u001B[0m hidden_states \u001B[38;5;241m=\u001B[39m residual \u001B[38;5;241m+\u001B[39m hidden_states\n\u001B[0;32m    742\u001B[0m \u001B[38;5;66;03m# Fully Connected\u001B[39;00m\n",
+      "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\torch\\nn\\modules\\module.py:1553\u001B[0m, in \u001B[0;36mModule._wrapped_call_impl\u001B[1;34m(self, *args, **kwargs)\u001B[0m\n\u001B[0;32m   1551\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_compiled_call_impl(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)  \u001B[38;5;66;03m# type: ignore[misc]\u001B[39;00m\n\u001B[0;32m   1552\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m-> 1553\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_call_impl(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n",
+      "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\torch\\nn\\modules\\module.py:1562\u001B[0m, in \u001B[0;36mModule._call_impl\u001B[1;34m(self, *args, **kwargs)\u001B[0m\n\u001B[0;32m   1557\u001B[0m \u001B[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001B[39;00m\n\u001B[0;32m   1558\u001B[0m \u001B[38;5;66;03m# this function, and just call forward.\u001B[39;00m\n\u001B[0;32m   1559\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m (\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_pre_hooks\n\u001B[0;32m   1560\u001B[0m         \u001B[38;5;129;01mor\u001B[39;00m _global_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_backward_hooks\n\u001B[0;32m   1561\u001B[0m         \u001B[38;5;129;01mor\u001B[39;00m _global_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_forward_pre_hooks):\n\u001B[1;32m-> 1562\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m forward_call(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n\u001B[0;32m   1564\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m   1565\u001B[0m     result \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m\n",
+      "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\accelerate\\hooks.py:170\u001B[0m, in \u001B[0;36madd_hook_to_module.<locals>.new_forward\u001B[1;34m(module, *args, **kwargs)\u001B[0m\n\u001B[0;32m    168\u001B[0m         output \u001B[38;5;241m=\u001B[39m module\u001B[38;5;241m.\u001B[39m_old_forward(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n\u001B[0;32m    169\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m--> 170\u001B[0m     output \u001B[38;5;241m=\u001B[39m module\u001B[38;5;241m.\u001B[39m_old_forward(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n\u001B[0;32m    171\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m module\u001B[38;5;241m.\u001B[39m_hf_hook\u001B[38;5;241m.\u001B[39mpost_forward(module, output)\n",
+      "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\models\\llama\\modeling_llama.py:638\u001B[0m, in \u001B[0;36mLlamaSdpaAttention.forward\u001B[1;34m(self, hidden_states, attention_mask, position_ids, past_key_value, output_attentions, use_cache, cache_position, position_embeddings, **kwargs)\u001B[0m\n\u001B[0;32m    635\u001B[0m     key_states, value_states \u001B[38;5;241m=\u001B[39m past_key_value\u001B[38;5;241m.\u001B[39mupdate(key_states, value_states, \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mlayer_idx, cache_kwargs)\n\u001B[0;32m    637\u001B[0m key_states \u001B[38;5;241m=\u001B[39m repeat_kv(key_states, \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mnum_key_value_groups)\n\u001B[1;32m--> 638\u001B[0m value_states \u001B[38;5;241m=\u001B[39m \u001B[43mrepeat_kv\u001B[49m\u001B[43m(\u001B[49m\u001B[43mvalue_states\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mnum_key_value_groups\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m    640\u001B[0m causal_mask \u001B[38;5;241m=\u001B[39m attention_mask\n\u001B[0;32m    641\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m attention_mask \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n",
+      "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\models\\llama\\modeling_llama.py:324\u001B[0m, in \u001B[0;36mrepeat_kv\u001B[1;34m(hidden_states, n_rep)\u001B[0m\n\u001B[0;32m    322\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m n_rep \u001B[38;5;241m==\u001B[39m \u001B[38;5;241m1\u001B[39m:\n\u001B[0;32m    323\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m hidden_states\n\u001B[1;32m--> 324\u001B[0m hidden_states \u001B[38;5;241m=\u001B[39m \u001B[43mhidden_states\u001B[49m\u001B[43m[\u001B[49m\u001B[43m:\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43m:\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43;01mNone\u001B[39;49;00m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43m:\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43m:\u001B[49m\u001B[43m]\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mexpand\u001B[49m\u001B[43m(\u001B[49m\u001B[43mbatch\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mnum_key_value_heads\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mn_rep\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mslen\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mhead_dim\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m    325\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m hidden_states\u001B[38;5;241m.\u001B[39mreshape(batch, num_key_value_heads \u001B[38;5;241m*\u001B[39m n_rep, slen, head_dim)\n",
+      "\u001B[1;31mKeyboardInterrupt\u001B[0m: "
      ]
     }
    ],
-   "execution_count": 10
+   "execution_count": 9
   }
  ],
  "metadata": {
-- 
GitLab