diff --git a/benchmark/benchmark.ipynb b/benchmark/benchmark.ipynb index ca3e16326606dfb7d3889bb2614835fe8ff33250..e2a29ccf71e939f5b848d29fe13acaa10ed41ef4 100644 --- a/benchmark/benchmark.ipynb +++ b/benchmark/benchmark.ipynb @@ -3,8 +3,8 @@ { "metadata": { "ExecuteTime": { - "end_time": "2025-01-31T11:11:12.365214Z", - "start_time": "2025-01-31T11:11:12.317218Z" + "end_time": "2025-01-31T11:12:14.216466Z", + "start_time": "2025-01-31T11:12:14.202447Z" } }, "cell_type": "code", @@ -30,26 +30,26 @@ ], "id": "9f7c6187bfc81ba6", "outputs": [], - "execution_count": 11 + "execution_count": 1 }, { "metadata": { "ExecuteTime": { - "end_time": "2025-01-31T11:11:14.107618Z", - "start_time": "2025-01-31T11:11:14.098522Z" + "end_time": "2025-01-31T11:12:18.467020Z", + "start_time": "2025-01-31T11:12:18.448483Z" } }, "cell_type": "code", "source": "trait_categories = [\"EXT\", \"EST\", \"AGR\", \"CSN\", \"OPN\"]", "id": "3e451794c0a28fec", "outputs": [], - "execution_count": 12 + "execution_count": 2 }, { "metadata": { "ExecuteTime": { - "end_time": "2025-01-31T11:11:16.998450Z", - "start_time": "2025-01-31T11:11:15.329200Z" + "end_time": "2025-01-31T11:12:29.123538Z", + "start_time": "2025-01-31T11:12:19.224229Z" } }, "cell_type": "code", @@ -92,33 +92,31 @@ "name": "stderr", "output_type": "stream", "text": [ - "The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.\n", - "C:\\Users\\qwert\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\accelerate\\utils\\modeling.py:1390: UserWarning: Current model requires 1856 bytes of buffer for offloaded layers, which seems does not fit any GPU's remaining memory. If you are experiencing a OOM later, please consider using offload_buffers=True.\n", - " warnings.warn(\n" + "The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.\n" ] }, { - "ename": "ValueError", - "evalue": "Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. ", - "output_type": "error", - "traceback": [ - "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m", - "\u001B[1;31mValueError\u001B[0m Traceback (most recent call last)", - "Cell \u001B[1;32mIn[13], line 11\u001B[0m\n\u001B[0;32m 9\u001B[0m tokenizer \u001B[38;5;241m=\u001B[39m AutoTokenizer\u001B[38;5;241m.\u001B[39mfrom_pretrained(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mmeta-llama/Llama-3.2-3B-Instruct\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[0;32m 10\u001B[0m tokenizer\u001B[38;5;241m.\u001B[39mpad_token \u001B[38;5;241m=\u001B[39m tokenizer\u001B[38;5;241m.\u001B[39meos_token\n\u001B[1;32m---> 11\u001B[0m model \u001B[38;5;241m=\u001B[39m \u001B[43mAutoModelForCausalLM\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mfrom_pretrained\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m 12\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mmeta-llama/Llama-3.2-3B-Instruct\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m,\u001B[49m\n\u001B[0;32m 13\u001B[0m \u001B[43m \u001B[49m\u001B[43mload_in_8bit\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43;01mTrue\u001B[39;49;00m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;66;43;03m# This loads the model in 8-bit precision\u001B[39;49;00m\n\u001B[0;32m 14\u001B[0m \u001B[43m \u001B[49m\u001B[43mdevice_map\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mauto\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m \u001B[49m\u001B[38;5;66;43;03m# Automatically assigns the model to available devices\u001B[39;49;00m\n\u001B[0;32m 15\u001B[0m \u001B[43m)\u001B[49m\n\u001B[0;32m 17\u001B[0m custom_pipeline \u001B[38;5;241m=\u001B[39m pipeline(\n\u001B[0;32m 18\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mtext-generation\u001B[39m\u001B[38;5;124m\"\u001B[39m,\n\u001B[0;32m 19\u001B[0m model\u001B[38;5;241m=\u001B[39mmodel,\n\u001B[0;32m 20\u001B[0m tokenizer\u001B[38;5;241m=\u001B[39mtokenizer,\n\u001B[0;32m 21\u001B[0m )\n\u001B[0;32m 22\u001B[0m terminators \u001B[38;5;241m=\u001B[39m [\n\u001B[0;32m 23\u001B[0m custom_pipeline\u001B[38;5;241m.\u001B[39mtokenizer\u001B[38;5;241m.\u001B[39meos_token_id,\n\u001B[0;32m 24\u001B[0m custom_pipeline\u001B[38;5;241m.\u001B[39mtokenizer\u001B[38;5;241m.\u001B[39mconvert_tokens_to_ids(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m\"\u001B[39m),\n\u001B[0;32m 25\u001B[0m ]\n", - "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\models\\auto\\auto_factory.py:564\u001B[0m, in \u001B[0;36m_BaseAutoModelClass.from_pretrained\u001B[1;34m(cls, pretrained_model_name_or_path, *model_args, **kwargs)\u001B[0m\n\u001B[0;32m 562\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m \u001B[38;5;28mtype\u001B[39m(config) \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28mcls\u001B[39m\u001B[38;5;241m.\u001B[39m_model_mapping\u001B[38;5;241m.\u001B[39mkeys():\n\u001B[0;32m 563\u001B[0m model_class \u001B[38;5;241m=\u001B[39m _get_model_class(config, \u001B[38;5;28mcls\u001B[39m\u001B[38;5;241m.\u001B[39m_model_mapping)\n\u001B[1;32m--> 564\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m model_class\u001B[38;5;241m.\u001B[39mfrom_pretrained(\n\u001B[0;32m 565\u001B[0m pretrained_model_name_or_path, \u001B[38;5;241m*\u001B[39mmodel_args, config\u001B[38;5;241m=\u001B[39mconfig, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mhub_kwargs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs\n\u001B[0;32m 566\u001B[0m )\n\u001B[0;32m 567\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\n\u001B[0;32m 568\u001B[0m \u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mUnrecognized configuration class \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mconfig\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__class__\u001B[39m\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m for this kind of AutoModel: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00m\u001B[38;5;28mcls\u001B[39m\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m.\u001B[39m\u001B[38;5;130;01m\\n\u001B[39;00m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m 569\u001B[0m \u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mModel type should be one of \u001B[39m\u001B[38;5;132;01m{\u001B[39;00m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124m, \u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;241m.\u001B[39mjoin(c\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m\u001B[38;5;250m \u001B[39m\u001B[38;5;28;01mfor\u001B[39;00m\u001B[38;5;250m \u001B[39mc\u001B[38;5;250m \u001B[39m\u001B[38;5;129;01min\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[38;5;28mcls\u001B[39m\u001B[38;5;241m.\u001B[39m_model_mapping\u001B[38;5;241m.\u001B[39mkeys())\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m.\u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m 570\u001B[0m )\n", - "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\modeling_utils.py:3963\u001B[0m, in \u001B[0;36mPreTrainedModel.from_pretrained\u001B[1;34m(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs)\u001B[0m\n\u001B[0;32m 3960\u001B[0m device_map \u001B[38;5;241m=\u001B[39m infer_auto_device_map(model, dtype\u001B[38;5;241m=\u001B[39mtarget_dtype, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mdevice_map_kwargs)\n\u001B[0;32m 3962\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m hf_quantizer \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[1;32m-> 3963\u001B[0m \u001B[43mhf_quantizer\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mvalidate_environment\u001B[49m\u001B[43m(\u001B[49m\u001B[43mdevice_map\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mdevice_map\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 3965\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m device_map \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[0;32m 3966\u001B[0m model\u001B[38;5;241m.\u001B[39mtie_weights()\n", - "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\quantizers\\quantizer_bnb_8bit.py:101\u001B[0m, in \u001B[0;36mBnb8BitHfQuantizer.validate_environment\u001B[1;34m(self, *args, **kwargs)\u001B[0m\n\u001B[0;32m 99\u001B[0m \u001B[38;5;28;01mpass\u001B[39;00m\n\u001B[0;32m 100\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mcpu\u001B[39m\u001B[38;5;124m\"\u001B[39m \u001B[38;5;129;01min\u001B[39;00m device_map_without_lm_head\u001B[38;5;241m.\u001B[39mvalues() \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mdisk\u001B[39m\u001B[38;5;124m\"\u001B[39m \u001B[38;5;129;01min\u001B[39;00m device_map_without_lm_head\u001B[38;5;241m.\u001B[39mvalues():\n\u001B[1;32m--> 101\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\n\u001B[0;32m 102\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mSome modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the \u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m 103\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mquantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules \u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m 104\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124min 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom `device_map` to \u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m 105\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m`from_pretrained`. Check \u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m 106\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mhttps://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu \u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m 107\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mfor more details. \u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m 108\u001B[0m )\n\u001B[0;32m 110\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m version\u001B[38;5;241m.\u001B[39mparse(importlib\u001B[38;5;241m.\u001B[39mmetadata\u001B[38;5;241m.\u001B[39mversion(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mbitsandbytes\u001B[39m\u001B[38;5;124m\"\u001B[39m)) \u001B[38;5;241m<\u001B[39m version\u001B[38;5;241m.\u001B[39mparse(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m0.37.2\u001B[39m\u001B[38;5;124m\"\u001B[39m):\n\u001B[0;32m 111\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\n\u001B[0;32m 112\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mYou have a version of `bitsandbytes` that is not compatible with 8bit inference and training\u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m 113\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m make sure you have the latest version of `bitsandbytes` installed\u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m 114\u001B[0m )\n", - "\u001B[1;31mValueError\u001B[0m: Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. " - ] + "data": { + "text/plain": [ + "Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "f91c638e4399428d8f3b321cc1ae071d" + } + }, + "metadata": {}, + "output_type": "display_data" } ], - "execution_count": 13 + "execution_count": 3 }, { "metadata": { "ExecuteTime": { - "end_time": "2025-01-31T10:58:27.934353Z", - "start_time": "2025-01-31T10:58:27.924834Z" + "end_time": "2025-01-31T11:12:32.705652Z", + "start_time": "2025-01-31T11:12:32.701140Z" } }, "cell_type": "code", @@ -144,8 +142,8 @@ { "metadata": { "ExecuteTime": { - "end_time": "2025-01-31T10:58:29.489756Z", - "start_time": "2025-01-31T10:58:29.461648Z" + "end_time": "2025-01-31T11:12:33.935985Z", + "start_time": "2025-01-31T11:12:33.917759Z" } }, "cell_type": "code", @@ -207,8 +205,8 @@ { "metadata": { "ExecuteTime": { - "end_time": "2025-01-31T11:00:58.310059Z", - "start_time": "2025-01-31T11:00:58.272535Z" + "end_time": "2025-01-31T11:12:36.111618Z", + "start_time": "2025-01-31T11:12:36.089989Z" } }, "cell_type": "code", @@ -316,39 +314,8 @@ { "metadata": { "ExecuteTime": { - "end_time": "2025-01-31T11:01:00.823080Z", - "start_time": "2025-01-31T11:01:00.816079Z" - } - }, - "cell_type": "code", - "source": [ - "def eval_trait(\n", - " trait,\n", - " num_statements,\n", - " mode,\n", - " samples,\n", - " batches,\n", - " optimized_prompt\n", - "):\n", - " trait_eval_results = []\n", - "\n", - " for i in range(0, batches):\n", - "\n", - " prediction_true_arr = eval_llm(samples, trait, num_statements, mode)\n", - " trait_eval_results = [samples, trait, num_statements, prediction_true_arr]\n", - " print(\"Batch: \" + (i+1).__str__() + \" Prompts: \" + samples.__str__())\n", - "\n", - " return trait_eval_results" - ], - "id": "5133362b9d7d2863", - "outputs": [], - "execution_count": 7 - }, - { - "metadata": { - "ExecuteTime": { - "end_time": "2025-01-31T11:01:02.328168Z", - "start_time": "2025-01-31T11:01:02.310961Z" + "end_time": "2025-01-31T11:13:09.666818Z", + "start_time": "2025-01-31T11:13:09.648801Z" } }, "cell_type": "code", @@ -371,8 +338,8 @@ { "metadata": { "ExecuteTime": { - "end_time": "2025-01-31T11:04:35.503990Z", - "start_time": "2025-01-31T11:01:11.569206Z" + "end_time": "2025-01-31T11:13:13.939212Z", + "start_time": "2025-01-31T11:13:10.754886Z" } }, "cell_type": "code", @@ -440,31 +407,53 @@ "C:\\Users\\qwert\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\generation\\configuration_utils.py:601: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.6` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.\n", " warnings.warn(\n", "C:\\Users\\qwert\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\generation\\configuration_utils.py:606: UserWarning: `do_sample` is set to `False`. However, `top_p` is set to `0.9` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `top_p`.\n", - " warnings.warn(\n" + " warnings.warn(\n", + "C:\\Users\\qwert\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\models\\llama\\modeling_llama.py:655: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\\actions-runner\\_work\\pytorch\\pytorch\\builder\\windows\\pytorch\\aten\\src\\ATen\\native\\transformers\\cuda\\sdp_utils.cpp:555.)\n", + " attn_output = torch.nn.functional.scaled_dot_product_attention(\n", + "Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)\n" ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "LLM: 1 Mensch: 5 | Dfs:EXT Entry:21\n", - "Based on the provided scale, predict the person's response to the new statement considering their previous answers.\n", - "----------------\n", - "LLM: 1 Mensch: 5 | Dfs:EST Entry:16\n", - "Based on the provided scale, predict the person's response to the new statement considering their previous answers.\n", - "----------------\n", - "LLM: 1 Mensch: 5 | Dfs:AGR Entry:58\n", - "Based on the provided scale, predict the person's response to the new statement considering their previous answers.\n", - "----------------\n", - "LLM: 1 Mensch: 5 | Dfs:OPN Entry:13\n", - "Based on the provided scale, predict the person's response to the new statement considering their previous answers.\n", - "----------------\n", - "Failed iterations: 0\n", - "Accuracy: 34.02298850574713\n" + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[1;31mKeyboardInterrupt\u001B[0m Traceback (most recent call last)", + "Cell \u001B[1;32mIn[9], line 8\u001B[0m\n\u001B[0;32m 4\u001B[0m importlib\u001B[38;5;241m.\u001B[39mreload(OtherUtils)\n\u001B[0;32m 6\u001B[0m prompt \u001B[38;5;241m=\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mBased on the provided scale, predict the person\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124ms response to the new statement considering their previous answers.\u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[1;32m----> 8\u001B[0m individual_result \u001B[38;5;241m=\u001B[39m \u001B[43meval_random_trait\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m 9\u001B[0m \u001B[43m \u001B[49m\u001B[43mnum_statements\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43m \u001B[49m\u001B[38;5;241;43m5\u001B[39;49m\u001B[43m,\u001B[49m\n\u001B[0;32m 10\u001B[0m \u001B[43m \u001B[49m\u001B[43mmode\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43m \u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mnumbers\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m,\u001B[49m\n\u001B[0;32m 11\u001B[0m \u001B[43m \u001B[49m\u001B[43moptimized_prompt\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43m \u001B[49m\u001B[43mprompt\u001B[49m\n\u001B[0;32m 12\u001B[0m \u001B[43m)\u001B[49m\n\u001B[0;32m 13\u001B[0m acc \u001B[38;5;241m=\u001B[39m Plots\u001B[38;5;241m.\u001B[39mget_accuracy(individual_result)\n\u001B[0;32m 14\u001B[0m \u001B[38;5;28mprint\u001B[39m(\u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mAccuracy: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00macc\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m\"\u001B[39m)\n", + "Cell \u001B[1;32mIn[8], line 7\u001B[0m, in \u001B[0;36meval_random_trait\u001B[1;34m(num_statements, mode, optimized_prompt)\u001B[0m\n\u001B[0;32m 1\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21meval_random_trait\u001B[39m(\n\u001B[0;32m 2\u001B[0m num_statements,\n\u001B[0;32m 3\u001B[0m mode,\n\u001B[0;32m 4\u001B[0m optimized_prompt\n\u001B[0;32m 5\u001B[0m ):\n\u001B[0;32m 6\u001B[0m eval_results \u001B[38;5;241m=\u001B[39m []\n\u001B[1;32m----> 7\u001B[0m prediction_true_arr \u001B[38;5;241m=\u001B[39m \u001B[43meval_llm\u001B[49m\u001B[43m(\u001B[49m\u001B[43mnum_statements\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mmode\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43moptimized_prompt\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 8\u001B[0m eval_results \u001B[38;5;241m=\u001B[39m [num_statements, prediction_true_arr]\n\u001B[0;32m 10\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m eval_results\n", + "Cell \u001B[1;32mIn[6], line 56\u001B[0m, in \u001B[0;36meval_llm\u001B[1;34m(num_statements, mode, optimized_prompt)\u001B[0m\n\u001B[0;32m 53\u001B[0m current_question \u001B[38;5;241m=\u001B[39m personality_traits[field_of_interest_formatted]\n\u001B[0;32m 54\u001B[0m human_answer \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mint\u001B[39m(entry[\u001B[38;5;241m1\u001B[39m][field_of_interest_formatted])\n\u001B[1;32m---> 56\u001B[0m llm_ans \u001B[38;5;241m=\u001B[39m \u001B[43mprompt_llm\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m 57\u001B[0m \u001B[43m \u001B[49m\u001B[43mpersonality_statements\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 58\u001B[0m \u001B[43m \u001B[49m\u001B[43mcurrent_question\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 59\u001B[0m \u001B[43m \u001B[49m\u001B[43mnum_statements\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 60\u001B[0m \u001B[43m \u001B[49m\u001B[43mprev_questions\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 61\u001B[0m \u001B[43m \u001B[49m\u001B[43mprev_answers\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 62\u001B[0m \u001B[43m \u001B[49m\u001B[43mmode\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 63\u001B[0m \u001B[43m \u001B[49m\u001B[43moptimized_prompt\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 64\u001B[0m \u001B[43m \u001B[49m\u001B[43mprint_prompt\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;129;43;01mnot\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[43mfirst_printed\u001B[49m\n\u001B[0;32m 65\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 67\u001B[0m first_printed \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mTrue\u001B[39;00m\n\u001B[0;32m 69\u001B[0m \u001B[38;5;66;03m#print(f\"LLM: {llm_ans} Mensch: {human_answer}\")\u001B[39;00m\n", + "Cell \u001B[1;32mIn[5], line 40\u001B[0m, in \u001B[0;36mprompt_llm\u001B[1;34m(personality_statement, current_question, num_statements, prev_questions, prev_answers, mode, optimized_prompt, print_prompt)\u001B[0m\n\u001B[0;32m 36\u001B[0m prompt \u001B[38;5;241m=\u001B[39m custom_chat_template(prompt)\n\u001B[0;32m 38\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m print_prompt: \u001B[38;5;28mprint\u001B[39m(prompt)\n\u001B[1;32m---> 40\u001B[0m outputs \u001B[38;5;241m=\u001B[39m \u001B[43mcustom_pipeline\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m 41\u001B[0m \u001B[43m \u001B[49m\u001B[43mprompt\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 42\u001B[0m \u001B[43m \u001B[49m\u001B[43mmax_new_tokens\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;241;43m20\u001B[39;49m\u001B[43m,\u001B[49m\n\u001B[0;32m 43\u001B[0m \u001B[43m \u001B[49m\u001B[43meos_token_id\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mcustom_pipeline\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mtokenizer\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43meos_token_id\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 44\u001B[0m \u001B[43m \u001B[49m\u001B[43mdo_sample\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43;01mFalse\u001B[39;49;00m\n\u001B[0;32m 45\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 47\u001B[0m response \u001B[38;5;241m=\u001B[39m outputs[\u001B[38;5;241m0\u001B[39m][\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mgenerated_text\u001B[39m\u001B[38;5;124m\"\u001B[39m][\u001B[38;5;28mlen\u001B[39m(prompt):]\n\u001B[0;32m 49\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m response\n", + "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\pipelines\\text_generation.py:272\u001B[0m, in \u001B[0;36mTextGenerationPipeline.__call__\u001B[1;34m(self, text_inputs, **kwargs)\u001B[0m\n\u001B[0;32m 270\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28msuper\u001B[39m()\u001B[38;5;241m.\u001B[39m\u001B[38;5;21m__call__\u001B[39m(chats, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n\u001B[0;32m 271\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m--> 272\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28msuper\u001B[39m()\u001B[38;5;241m.\u001B[39m\u001B[38;5;21m__call__\u001B[39m(text_inputs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n", + "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\pipelines\\base.py:1268\u001B[0m, in \u001B[0;36mPipeline.__call__\u001B[1;34m(self, inputs, num_workers, batch_size, *args, **kwargs)\u001B[0m\n\u001B[0;32m 1260\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mnext\u001B[39m(\n\u001B[0;32m 1261\u001B[0m \u001B[38;5;28miter\u001B[39m(\n\u001B[0;32m 1262\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mget_iterator(\n\u001B[1;32m (...)\u001B[0m\n\u001B[0;32m 1265\u001B[0m )\n\u001B[0;32m 1266\u001B[0m )\n\u001B[0;32m 1267\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m-> 1268\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mrun_single\u001B[49m\u001B[43m(\u001B[49m\u001B[43minputs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mpreprocess_params\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mforward_params\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mpostprocess_params\u001B[49m\u001B[43m)\u001B[49m\n", + "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\pipelines\\base.py:1275\u001B[0m, in \u001B[0;36mPipeline.run_single\u001B[1;34m(self, inputs, preprocess_params, forward_params, postprocess_params)\u001B[0m\n\u001B[0;32m 1273\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mrun_single\u001B[39m(\u001B[38;5;28mself\u001B[39m, inputs, preprocess_params, forward_params, postprocess_params):\n\u001B[0;32m 1274\u001B[0m model_inputs \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mpreprocess(inputs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mpreprocess_params)\n\u001B[1;32m-> 1275\u001B[0m model_outputs \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mforward(model_inputs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mforward_params)\n\u001B[0;32m 1276\u001B[0m outputs \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mpostprocess(model_outputs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mpostprocess_params)\n\u001B[0;32m 1277\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m outputs\n", + "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\pipelines\\base.py:1175\u001B[0m, in \u001B[0;36mPipeline.forward\u001B[1;34m(self, model_inputs, **forward_params)\u001B[0m\n\u001B[0;32m 1173\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m inference_context():\n\u001B[0;32m 1174\u001B[0m model_inputs \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_ensure_tensor_on_device(model_inputs, device\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdevice)\n\u001B[1;32m-> 1175\u001B[0m model_outputs \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward(model_inputs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mforward_params)\n\u001B[0;32m 1176\u001B[0m model_outputs \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_ensure_tensor_on_device(model_outputs, device\u001B[38;5;241m=\u001B[39mtorch\u001B[38;5;241m.\u001B[39mdevice(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mcpu\u001B[39m\u001B[38;5;124m\"\u001B[39m))\n\u001B[0;32m 1177\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n", + "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\pipelines\\text_generation.py:370\u001B[0m, in \u001B[0;36mTextGenerationPipeline._forward\u001B[1;34m(self, model_inputs, **generate_kwargs)\u001B[0m\n\u001B[0;32m 367\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mgeneration_config\u001B[39m\u001B[38;5;124m\"\u001B[39m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;129;01min\u001B[39;00m generate_kwargs:\n\u001B[0;32m 368\u001B[0m generate_kwargs[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mgeneration_config\u001B[39m\u001B[38;5;124m\"\u001B[39m] \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mgeneration_config\n\u001B[1;32m--> 370\u001B[0m generated_sequence \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mmodel\u001B[38;5;241m.\u001B[39mgenerate(input_ids\u001B[38;5;241m=\u001B[39minput_ids, attention_mask\u001B[38;5;241m=\u001B[39mattention_mask, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mgenerate_kwargs)\n\u001B[0;32m 371\u001B[0m out_b \u001B[38;5;241m=\u001B[39m generated_sequence\u001B[38;5;241m.\u001B[39mshape[\u001B[38;5;241m0\u001B[39m]\n\u001B[0;32m 372\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mframework \u001B[38;5;241m==\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mpt\u001B[39m\u001B[38;5;124m\"\u001B[39m:\n", + "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\torch\\utils\\_contextlib.py:116\u001B[0m, in \u001B[0;36mcontext_decorator.<locals>.decorate_context\u001B[1;34m(*args, **kwargs)\u001B[0m\n\u001B[0;32m 113\u001B[0m \u001B[38;5;129m@functools\u001B[39m\u001B[38;5;241m.\u001B[39mwraps(func)\n\u001B[0;32m 114\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mdecorate_context\u001B[39m(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs):\n\u001B[0;32m 115\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m ctx_factory():\n\u001B[1;32m--> 116\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m func(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n", + "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\generation\\utils.py:2047\u001B[0m, in \u001B[0;36mGenerationMixin.generate\u001B[1;34m(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)\u001B[0m\n\u001B[0;32m 2039\u001B[0m input_ids, model_kwargs \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_expand_inputs_for_generation(\n\u001B[0;32m 2040\u001B[0m input_ids\u001B[38;5;241m=\u001B[39minput_ids,\n\u001B[0;32m 2041\u001B[0m expand_size\u001B[38;5;241m=\u001B[39mgeneration_config\u001B[38;5;241m.\u001B[39mnum_return_sequences,\n\u001B[0;32m 2042\u001B[0m is_encoder_decoder\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mconfig\u001B[38;5;241m.\u001B[39mis_encoder_decoder,\n\u001B[0;32m 2043\u001B[0m \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mmodel_kwargs,\n\u001B[0;32m 2044\u001B[0m )\n\u001B[0;32m 2046\u001B[0m \u001B[38;5;66;03m# 12. run sample (it degenerates to greedy search when `generation_config.do_sample=False`)\u001B[39;00m\n\u001B[1;32m-> 2047\u001B[0m result \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_sample(\n\u001B[0;32m 2048\u001B[0m input_ids,\n\u001B[0;32m 2049\u001B[0m logits_processor\u001B[38;5;241m=\u001B[39mprepared_logits_processor,\n\u001B[0;32m 2050\u001B[0m stopping_criteria\u001B[38;5;241m=\u001B[39mprepared_stopping_criteria,\n\u001B[0;32m 2051\u001B[0m generation_config\u001B[38;5;241m=\u001B[39mgeneration_config,\n\u001B[0;32m 2052\u001B[0m synced_gpus\u001B[38;5;241m=\u001B[39msynced_gpus,\n\u001B[0;32m 2053\u001B[0m streamer\u001B[38;5;241m=\u001B[39mstreamer,\n\u001B[0;32m 2054\u001B[0m \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mmodel_kwargs,\n\u001B[0;32m 2055\u001B[0m )\n\u001B[0;32m 2057\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m generation_mode \u001B[38;5;129;01min\u001B[39;00m (GenerationMode\u001B[38;5;241m.\u001B[39mBEAM_SAMPLE, GenerationMode\u001B[38;5;241m.\u001B[39mBEAM_SEARCH):\n\u001B[0;32m 2058\u001B[0m \u001B[38;5;66;03m# 11. prepare beam search scorer\u001B[39;00m\n\u001B[0;32m 2059\u001B[0m beam_scorer \u001B[38;5;241m=\u001B[39m BeamSearchScorer(\n\u001B[0;32m 2060\u001B[0m batch_size\u001B[38;5;241m=\u001B[39mbatch_size,\n\u001B[0;32m 2061\u001B[0m num_beams\u001B[38;5;241m=\u001B[39mgeneration_config\u001B[38;5;241m.\u001B[39mnum_beams,\n\u001B[1;32m (...)\u001B[0m\n\u001B[0;32m 2066\u001B[0m max_length\u001B[38;5;241m=\u001B[39mgeneration_config\u001B[38;5;241m.\u001B[39mmax_length,\n\u001B[0;32m 2067\u001B[0m )\n", + "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\generation\\utils.py:3007\u001B[0m, in \u001B[0;36mGenerationMixin._sample\u001B[1;34m(self, input_ids, logits_processor, stopping_criteria, generation_config, synced_gpus, streamer, **model_kwargs)\u001B[0m\n\u001B[0;32m 3004\u001B[0m model_inputs\u001B[38;5;241m.\u001B[39mupdate({\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124moutput_hidden_states\u001B[39m\u001B[38;5;124m\"\u001B[39m: output_hidden_states} \u001B[38;5;28;01mif\u001B[39;00m output_hidden_states \u001B[38;5;28;01melse\u001B[39;00m {})\n\u001B[0;32m 3006\u001B[0m \u001B[38;5;66;03m# forward pass to get next token\u001B[39;00m\n\u001B[1;32m-> 3007\u001B[0m outputs \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m(\u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mmodel_inputs, return_dict\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m)\n\u001B[0;32m 3009\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m synced_gpus \u001B[38;5;129;01mand\u001B[39;00m this_peer_finished:\n\u001B[0;32m 3010\u001B[0m \u001B[38;5;28;01mcontinue\u001B[39;00m \u001B[38;5;66;03m# don't waste resources running the code we don't need\u001B[39;00m\n", + "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\torch\\nn\\modules\\module.py:1553\u001B[0m, in \u001B[0;36mModule._wrapped_call_impl\u001B[1;34m(self, *args, **kwargs)\u001B[0m\n\u001B[0;32m 1551\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_compiled_call_impl(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs) \u001B[38;5;66;03m# type: ignore[misc]\u001B[39;00m\n\u001B[0;32m 1552\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m-> 1553\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_call_impl(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n", + "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\torch\\nn\\modules\\module.py:1562\u001B[0m, in \u001B[0;36mModule._call_impl\u001B[1;34m(self, *args, **kwargs)\u001B[0m\n\u001B[0;32m 1557\u001B[0m \u001B[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001B[39;00m\n\u001B[0;32m 1558\u001B[0m \u001B[38;5;66;03m# this function, and just call forward.\u001B[39;00m\n\u001B[0;32m 1559\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m (\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_pre_hooks\n\u001B[0;32m 1560\u001B[0m \u001B[38;5;129;01mor\u001B[39;00m _global_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_backward_hooks\n\u001B[0;32m 1561\u001B[0m \u001B[38;5;129;01mor\u001B[39;00m _global_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_forward_pre_hooks):\n\u001B[1;32m-> 1562\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m forward_call(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n\u001B[0;32m 1564\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m 1565\u001B[0m result \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m\n", + "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\accelerate\\hooks.py:170\u001B[0m, in \u001B[0;36madd_hook_to_module.<locals>.new_forward\u001B[1;34m(module, *args, **kwargs)\u001B[0m\n\u001B[0;32m 168\u001B[0m output \u001B[38;5;241m=\u001B[39m module\u001B[38;5;241m.\u001B[39m_old_forward(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n\u001B[0;32m 169\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m--> 170\u001B[0m output \u001B[38;5;241m=\u001B[39m module\u001B[38;5;241m.\u001B[39m_old_forward(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n\u001B[0;32m 171\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m module\u001B[38;5;241m.\u001B[39m_hf_hook\u001B[38;5;241m.\u001B[39mpost_forward(module, output)\n", + "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\models\\llama\\modeling_llama.py:1189\u001B[0m, in \u001B[0;36mLlamaForCausalLM.forward\u001B[1;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict, cache_position, num_logits_to_keep)\u001B[0m\n\u001B[0;32m 1186\u001B[0m return_dict \u001B[38;5;241m=\u001B[39m return_dict \u001B[38;5;28;01mif\u001B[39;00m return_dict \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m \u001B[38;5;28;01melse\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mconfig\u001B[38;5;241m.\u001B[39muse_return_dict\n\u001B[0;32m 1188\u001B[0m \u001B[38;5;66;03m# decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)\u001B[39;00m\n\u001B[1;32m-> 1189\u001B[0m outputs \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mmodel\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m 1190\u001B[0m \u001B[43m \u001B[49m\u001B[43minput_ids\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43minput_ids\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 1191\u001B[0m \u001B[43m \u001B[49m\u001B[43mattention_mask\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mattention_mask\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 1192\u001B[0m \u001B[43m \u001B[49m\u001B[43mposition_ids\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mposition_ids\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 1193\u001B[0m \u001B[43m \u001B[49m\u001B[43mpast_key_values\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mpast_key_values\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 1194\u001B[0m \u001B[43m \u001B[49m\u001B[43minputs_embeds\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43minputs_embeds\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 1195\u001B[0m \u001B[43m \u001B[49m\u001B[43muse_cache\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43muse_cache\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 1196\u001B[0m \u001B[43m \u001B[49m\u001B[43moutput_attentions\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43moutput_attentions\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 1197\u001B[0m \u001B[43m \u001B[49m\u001B[43moutput_hidden_states\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43moutput_hidden_states\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 1198\u001B[0m \u001B[43m \u001B[49m\u001B[43mreturn_dict\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mreturn_dict\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 1199\u001B[0m \u001B[43m \u001B[49m\u001B[43mcache_position\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mcache_position\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 1200\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 1202\u001B[0m hidden_states \u001B[38;5;241m=\u001B[39m outputs[\u001B[38;5;241m0\u001B[39m]\n\u001B[0;32m 1203\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mconfig\u001B[38;5;241m.\u001B[39mpretraining_tp \u001B[38;5;241m>\u001B[39m \u001B[38;5;241m1\u001B[39m:\n", + "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\torch\\nn\\modules\\module.py:1553\u001B[0m, in \u001B[0;36mModule._wrapped_call_impl\u001B[1;34m(self, *args, **kwargs)\u001B[0m\n\u001B[0;32m 1551\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_compiled_call_impl(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs) \u001B[38;5;66;03m# type: ignore[misc]\u001B[39;00m\n\u001B[0;32m 1552\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m-> 1553\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_call_impl(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n", + "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\torch\\nn\\modules\\module.py:1562\u001B[0m, in \u001B[0;36mModule._call_impl\u001B[1;34m(self, *args, **kwargs)\u001B[0m\n\u001B[0;32m 1557\u001B[0m \u001B[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001B[39;00m\n\u001B[0;32m 1558\u001B[0m \u001B[38;5;66;03m# this function, and just call forward.\u001B[39;00m\n\u001B[0;32m 1559\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m (\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_pre_hooks\n\u001B[0;32m 1560\u001B[0m \u001B[38;5;129;01mor\u001B[39;00m _global_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_backward_hooks\n\u001B[0;32m 1561\u001B[0m \u001B[38;5;129;01mor\u001B[39;00m _global_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_forward_pre_hooks):\n\u001B[1;32m-> 1562\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m forward_call(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n\u001B[0;32m 1564\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m 1565\u001B[0m result \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m\n", + "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\accelerate\\hooks.py:170\u001B[0m, in \u001B[0;36madd_hook_to_module.<locals>.new_forward\u001B[1;34m(module, *args, **kwargs)\u001B[0m\n\u001B[0;32m 168\u001B[0m output \u001B[38;5;241m=\u001B[39m module\u001B[38;5;241m.\u001B[39m_old_forward(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n\u001B[0;32m 169\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m--> 170\u001B[0m output \u001B[38;5;241m=\u001B[39m module\u001B[38;5;241m.\u001B[39m_old_forward(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n\u001B[0;32m 171\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m module\u001B[38;5;241m.\u001B[39m_hf_hook\u001B[38;5;241m.\u001B[39mpost_forward(module, output)\n", + "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\models\\llama\\modeling_llama.py:1000\u001B[0m, in \u001B[0;36mLlamaModel.forward\u001B[1;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict, cache_position)\u001B[0m\n\u001B[0;32m 988\u001B[0m layer_outputs \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_gradient_checkpointing_func(\n\u001B[0;32m 989\u001B[0m decoder_layer\u001B[38;5;241m.\u001B[39m\u001B[38;5;21m__call__\u001B[39m,\n\u001B[0;32m 990\u001B[0m hidden_states,\n\u001B[1;32m (...)\u001B[0m\n\u001B[0;32m 997\u001B[0m position_embeddings,\n\u001B[0;32m 998\u001B[0m )\n\u001B[0;32m 999\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m-> 1000\u001B[0m layer_outputs \u001B[38;5;241m=\u001B[39m \u001B[43mdecoder_layer\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m 1001\u001B[0m \u001B[43m \u001B[49m\u001B[43mhidden_states\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 1002\u001B[0m \u001B[43m \u001B[49m\u001B[43mattention_mask\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mcausal_mask\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 1003\u001B[0m \u001B[43m \u001B[49m\u001B[43mposition_ids\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mposition_ids\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 1004\u001B[0m \u001B[43m \u001B[49m\u001B[43mpast_key_value\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mpast_key_values\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 1005\u001B[0m \u001B[43m \u001B[49m\u001B[43moutput_attentions\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43moutput_attentions\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 1006\u001B[0m \u001B[43m \u001B[49m\u001B[43muse_cache\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43muse_cache\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 1007\u001B[0m \u001B[43m \u001B[49m\u001B[43mcache_position\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mcache_position\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 1008\u001B[0m \u001B[43m \u001B[49m\u001B[43mposition_embeddings\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mposition_embeddings\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 1009\u001B[0m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 1011\u001B[0m hidden_states \u001B[38;5;241m=\u001B[39m layer_outputs[\u001B[38;5;241m0\u001B[39m]\n\u001B[0;32m 1013\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m use_cache:\n", + "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\torch\\nn\\modules\\module.py:1553\u001B[0m, in \u001B[0;36mModule._wrapped_call_impl\u001B[1;34m(self, *args, **kwargs)\u001B[0m\n\u001B[0;32m 1551\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_compiled_call_impl(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs) \u001B[38;5;66;03m# type: ignore[misc]\u001B[39;00m\n\u001B[0;32m 1552\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m-> 1553\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_call_impl(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n", + "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\torch\\nn\\modules\\module.py:1562\u001B[0m, in \u001B[0;36mModule._call_impl\u001B[1;34m(self, *args, **kwargs)\u001B[0m\n\u001B[0;32m 1557\u001B[0m \u001B[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001B[39;00m\n\u001B[0;32m 1558\u001B[0m \u001B[38;5;66;03m# this function, and just call forward.\u001B[39;00m\n\u001B[0;32m 1559\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m (\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_pre_hooks\n\u001B[0;32m 1560\u001B[0m \u001B[38;5;129;01mor\u001B[39;00m _global_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_backward_hooks\n\u001B[0;32m 1561\u001B[0m \u001B[38;5;129;01mor\u001B[39;00m _global_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_forward_pre_hooks):\n\u001B[1;32m-> 1562\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m forward_call(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n\u001B[0;32m 1564\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m 1565\u001B[0m result \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m\n", + "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\accelerate\\hooks.py:170\u001B[0m, in \u001B[0;36madd_hook_to_module.<locals>.new_forward\u001B[1;34m(module, *args, **kwargs)\u001B[0m\n\u001B[0;32m 168\u001B[0m output \u001B[38;5;241m=\u001B[39m module\u001B[38;5;241m.\u001B[39m_old_forward(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n\u001B[0;32m 169\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m--> 170\u001B[0m output \u001B[38;5;241m=\u001B[39m module\u001B[38;5;241m.\u001B[39m_old_forward(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n\u001B[0;32m 171\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m module\u001B[38;5;241m.\u001B[39m_hf_hook\u001B[38;5;241m.\u001B[39mpost_forward(module, output)\n", + "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\models\\llama\\modeling_llama.py:729\u001B[0m, in \u001B[0;36mLlamaDecoderLayer.forward\u001B[1;34m(self, hidden_states, attention_mask, position_ids, past_key_value, output_attentions, use_cache, cache_position, position_embeddings, **kwargs)\u001B[0m\n\u001B[0;32m 726\u001B[0m hidden_states \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39minput_layernorm(hidden_states)\n\u001B[0;32m 728\u001B[0m \u001B[38;5;66;03m# Self Attention\u001B[39;00m\n\u001B[1;32m--> 729\u001B[0m hidden_states, self_attn_weights, present_key_value \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mself_attn(\n\u001B[0;32m 730\u001B[0m hidden_states\u001B[38;5;241m=\u001B[39mhidden_states,\n\u001B[0;32m 731\u001B[0m attention_mask\u001B[38;5;241m=\u001B[39mattention_mask,\n\u001B[0;32m 732\u001B[0m position_ids\u001B[38;5;241m=\u001B[39mposition_ids,\n\u001B[0;32m 733\u001B[0m past_key_value\u001B[38;5;241m=\u001B[39mpast_key_value,\n\u001B[0;32m 734\u001B[0m output_attentions\u001B[38;5;241m=\u001B[39moutput_attentions,\n\u001B[0;32m 735\u001B[0m use_cache\u001B[38;5;241m=\u001B[39muse_cache,\n\u001B[0;32m 736\u001B[0m cache_position\u001B[38;5;241m=\u001B[39mcache_position,\n\u001B[0;32m 737\u001B[0m position_embeddings\u001B[38;5;241m=\u001B[39mposition_embeddings,\n\u001B[0;32m 738\u001B[0m \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs,\n\u001B[0;32m 739\u001B[0m )\n\u001B[0;32m 740\u001B[0m hidden_states \u001B[38;5;241m=\u001B[39m residual \u001B[38;5;241m+\u001B[39m hidden_states\n\u001B[0;32m 742\u001B[0m \u001B[38;5;66;03m# Fully Connected\u001B[39;00m\n", + "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\torch\\nn\\modules\\module.py:1553\u001B[0m, in \u001B[0;36mModule._wrapped_call_impl\u001B[1;34m(self, *args, **kwargs)\u001B[0m\n\u001B[0;32m 1551\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_compiled_call_impl(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs) \u001B[38;5;66;03m# type: ignore[misc]\u001B[39;00m\n\u001B[0;32m 1552\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m-> 1553\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_call_impl(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n", + "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\torch\\nn\\modules\\module.py:1562\u001B[0m, in \u001B[0;36mModule._call_impl\u001B[1;34m(self, *args, **kwargs)\u001B[0m\n\u001B[0;32m 1557\u001B[0m \u001B[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001B[39;00m\n\u001B[0;32m 1558\u001B[0m \u001B[38;5;66;03m# this function, and just call forward.\u001B[39;00m\n\u001B[0;32m 1559\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m (\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_forward_pre_hooks\n\u001B[0;32m 1560\u001B[0m \u001B[38;5;129;01mor\u001B[39;00m _global_backward_pre_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_backward_hooks\n\u001B[0;32m 1561\u001B[0m \u001B[38;5;129;01mor\u001B[39;00m _global_forward_hooks \u001B[38;5;129;01mor\u001B[39;00m _global_forward_pre_hooks):\n\u001B[1;32m-> 1562\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m forward_call(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n\u001B[0;32m 1564\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m 1565\u001B[0m result \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m\n", + "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\accelerate\\hooks.py:170\u001B[0m, in \u001B[0;36madd_hook_to_module.<locals>.new_forward\u001B[1;34m(module, *args, **kwargs)\u001B[0m\n\u001B[0;32m 168\u001B[0m output \u001B[38;5;241m=\u001B[39m module\u001B[38;5;241m.\u001B[39m_old_forward(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n\u001B[0;32m 169\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m--> 170\u001B[0m output \u001B[38;5;241m=\u001B[39m module\u001B[38;5;241m.\u001B[39m_old_forward(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n\u001B[0;32m 171\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m module\u001B[38;5;241m.\u001B[39m_hf_hook\u001B[38;5;241m.\u001B[39mpost_forward(module, output)\n", + "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\models\\llama\\modeling_llama.py:638\u001B[0m, in \u001B[0;36mLlamaSdpaAttention.forward\u001B[1;34m(self, hidden_states, attention_mask, position_ids, past_key_value, output_attentions, use_cache, cache_position, position_embeddings, **kwargs)\u001B[0m\n\u001B[0;32m 635\u001B[0m key_states, value_states \u001B[38;5;241m=\u001B[39m past_key_value\u001B[38;5;241m.\u001B[39mupdate(key_states, value_states, \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mlayer_idx, cache_kwargs)\n\u001B[0;32m 637\u001B[0m key_states \u001B[38;5;241m=\u001B[39m repeat_kv(key_states, \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mnum_key_value_groups)\n\u001B[1;32m--> 638\u001B[0m value_states \u001B[38;5;241m=\u001B[39m \u001B[43mrepeat_kv\u001B[49m\u001B[43m(\u001B[49m\u001B[43mvalue_states\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mnum_key_value_groups\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 640\u001B[0m causal_mask \u001B[38;5;241m=\u001B[39m attention_mask\n\u001B[0;32m 641\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m attention_mask \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n", + "File \u001B[1;32m~\\PycharmProjects\\BachelorArbeit\\.venv\\lib\\site-packages\\transformers\\models\\llama\\modeling_llama.py:324\u001B[0m, in \u001B[0;36mrepeat_kv\u001B[1;34m(hidden_states, n_rep)\u001B[0m\n\u001B[0;32m 322\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m n_rep \u001B[38;5;241m==\u001B[39m \u001B[38;5;241m1\u001B[39m:\n\u001B[0;32m 323\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m hidden_states\n\u001B[1;32m--> 324\u001B[0m hidden_states \u001B[38;5;241m=\u001B[39m \u001B[43mhidden_states\u001B[49m\u001B[43m[\u001B[49m\u001B[43m:\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43m:\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43;01mNone\u001B[39;49;00m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43m:\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43m:\u001B[49m\u001B[43m]\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mexpand\u001B[49m\u001B[43m(\u001B[49m\u001B[43mbatch\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mnum_key_value_heads\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mn_rep\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mslen\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mhead_dim\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 325\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m hidden_states\u001B[38;5;241m.\u001B[39mreshape(batch, num_key_value_heads \u001B[38;5;241m*\u001B[39m n_rep, slen, head_dim)\n", + "\u001B[1;31mKeyboardInterrupt\u001B[0m: " ] } ], - "execution_count": 10 + "execution_count": 9 } ], "metadata": {