diff --git a/SleepStageModels.ipynb b/SleepStageModels.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..d0f6cd83e0e2ca1f1481b2b8341b88824b422076
--- /dev/null
+++ b/SleepStageModels.ipynb
@@ -0,0 +1,858 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "L300ySZBqbyH"
+      },
+      "source": [
+        "# Installation"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "YSztofTQiTt7"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install pyedflib\n",
+        "!pip install numpy\n",
+        "!pip install xmltodict\n",
+        "!pip install mne\n",
+        "!pip install tensorflow\n",
+        "!pip install pandas\n",
+        "!pip install scikit-learn\n",
+        "!pip install hampel\n",
+        "!pip install keras-tuner"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "D8J3VziBqm-p"
+      },
+      "source": [
+        "# Prepare data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "2X2HY602mIwc",
+        "outputId": "1a01f8a7-71ab-4241-96fa-57cef60ed907"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Mounted at /content/drive\n"
+          ]
+        }
+      ],
+      "source": [
+        "# Mount to google drive\n",
+        "\n",
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 3,
+      "metadata": {
+        "id": "n4FwOJ0Dm7ro"
+      },
+      "outputs": [],
+      "source": [
+        "# create Folders to store the data from google drive\n",
+        "import os\n",
+        "\n",
+        "path_to_edf_files = 'edf_files'\n",
+        "if not os.path.exists(path_to_edf_files):\n",
+        "  os.mkdir(path_to_edf_files)\n",
+        "\n",
+        "path_to_annotations = 'annotations' #/content/drive/My Drive/annotations'\n",
+        "if not os.path.exists(path_to_annotations):\n",
+        "  os.mkdir(path_to_annotations)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 4,
+      "metadata": {
+        "id": "v3vZ-kppi4DA"
+      },
+      "outputs": [],
+      "source": [
+        "import shutil\n",
+        "# Be carefull not to delete folder by accident (always comment out after running it)\n",
+        "#shutil.rmtree(path_to_annotations)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "hKFNmDrEmNpy"
+      },
+      "outputs": [],
+      "source": [
+        "import zipfile\n",
+        "\n",
+        "# Info: All edf and annotation Files are stored in multiple zip_files in google drive.\n",
+        "      # Root folder for edf files is path_to_all_edf_zip_folder\n",
+        "      # Root folder for annotation files is path_to_all_annotation_zip_folder\n",
+        "\n",
+        "path_to_all_edf_zip_folder = '/content/drive/My Drive/shhs2_edf_zip'\n",
+        "path_to_all_annotation_zip_folder = '/content/drive/My Drive/shhs2_annotation_zip'\n",
+        "\n",
+        "\n",
+        "# Unzip all files from path_to_all_edf_zip_folder into folder 'edf_files'\n",
+        "for filename in os.listdir(path_to_all_edf_zip_folder):\n",
+        "    if filename.endswith('.zip'):\n",
+        "        zip_path = os.path.join(path_to_all_edf_zip_folder, filename)\n",
+        "\n",
+        "        # Open ZIP-File\n",
+        "        with zipfile.ZipFile(zip_path, 'r') as zip_ref:\n",
+        "            # extract files into path_to_edf_files\n",
+        "            print(zip_path)\n",
+        "            !unzip \"$zip_path\" -d \"$path_to_edf_files\"\n",
+        "\n",
+        "        print(f'Extrahiert: {filename}')\n",
+        "\n",
+        "\n",
+        "# Unzip all files from path_to_all_annotation_zip_folder into folder 'annotations'\n",
+        "for filename in os.listdir(path_to_all_annotation_zip_folder):\n",
+        "    if filename.endswith('.zip'):\n",
+        "        zip_path = os.path.join(path_to_all_annotation_zip_folder, filename)\n",
+        "\n",
+        "        # Open ZIP-File\n",
+        "        with zipfile.ZipFile(zip_path, 'r') as zip_ref:\n",
+        "            # # extract files into path_to_annotations\n",
+        "            print(zip_path)\n",
+        "            !unzip \"$zip_path\" -d \"$path_to_annotations\"\n",
+        "\n",
+        "        print(f'Extrahiert: {filename}')\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "VdTlPBDxrJYL"
+      },
+      "source": [
+        "# Imports"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "V8AE7mFJjbI3"
+      },
+      "outputs": [],
+      "source": [
+        "import pyedflib\n",
+        "import numpy as np\n",
+        "import os\n",
+        "import xmltodict\n",
+        "import mne\n",
+        "import csv\n",
+        "import matplotlib.pyplot as plt\n",
+        "import pandas as pd\n",
+        "from scipy.signal import butter, lfilter, resample\n",
+        "from hampel import hampel\n",
+        "import numpy\n",
+        "import statistics\n",
+        "import pywt\n",
+        "from scipy.fft import fft, ifft, fftfreq\n",
+        "\n",
+        "from tensorflow.keras.layers import LSTM, Dense\n",
+        "from sklearn.model_selection import train_test_split\n",
+        "from sklearn.ensemble import RandomForestClassifier\n",
+        "from sklearn.neighbors import KNeighborsClassifier\n",
+        "from sklearn.metrics import classification_report, cohen_kappa_score\n",
+        "from sklearn.preprocessing import LabelEncoder\n",
+        "import imblearn\n",
+        "from collections import Counter\n",
+        "from sklearn.datasets import make_classification\n",
+        "from matplotlib import pyplot\n",
+        "from numpy import where\n",
+        "from imblearn.over_sampling import SMOTE\n",
+        "from imblearn.under_sampling import RandomUnderSampler\n",
+        "from imblearn.pipeline import Pipeline\n",
+        "from sklearn.model_selection import train_test_split, RandomizedSearchCV, cross_val_predict, cross_val_score\n",
+        "\n",
+        "from scipy.stats import skew, kurtosis\n",
+        "from tensorflow.keras.models import Sequential\n",
+        "from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense\n",
+        "from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
+        "from sklearn.model_selection import train_test_split\n",
+        "from sklearn.metrics import classification_report, cohen_kappa_score\n",
+        "from sklearn.preprocessing import LabelEncoder\n",
+        "\n",
+        "from sklearn.metrics import precision_score\n",
+        "from sklearn.metrics import recall_score\n",
+        "from sklearn.metrics import f1_score\n",
+        "\n",
+        "from sklearn.metrics import cohen_kappa_score, recall_score, f1_score, classification_report\n",
+        "from tensorflow.keras.optimizers import Adam\n",
+        "import kerastuner as kt\n",
+        "from kerastuner.tuners import RandomSearch\n",
+        "\n",
+        "import traceback\n",
+        "import warnings"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "24EeYQ3DrNrt"
+      },
+      "source": [
+        "# Read available channels"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "HHNMzn3Gcu3N"
+      },
+      "outputs": [],
+      "source": [
+        "for filename in os.listdir(path_to_edf_files):\n",
+        "  data = mne.io.read_raw_edf(path_to_edf_files + \"/\" + filename)\n",
+        "  raw_data = data.get_data()\n",
+        "  channel_names = data.ch_names\n",
+        "  print(channel_names)\n",
+        "  break\n",
+        "\n",
+        "# I only use PR, SaO2, Position and ABDO RES"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DD5MaGB4rUKv"
+      },
+      "source": [
+        "# Functions"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 8,
+      "metadata": {
+        "id": "c6rjgpCwrYQK"
+      },
+      "outputs": [],
+      "source": [
+        "def calculate_epochs_and_remainings(total_seconds, epoch_duration):\n",
+        "    # Calculate the number of complete epochs\n",
+        "    epochs_completed = total_seconds // epoch_duration\n",
+        "\n",
+        "    # Calculate the remaining seconds\n",
+        "    remaining_seconds = total_seconds % epoch_duration\n",
+        "\n",
+        "    return epochs_completed, remaining_seconds\n",
+        "\n",
+        "# Visualize signal\n",
+        "def visualize_signal(data, title):\n",
+        "    \"\"\"\n",
+        "    Visualize EEG signal data.\n",
+        "    :param data: EEG signal data as a list or numpy array.\n",
+        "    \"\"\"\n",
+        "    plt.figure(figsize=(12, 6))\n",
+        "    plt.plot(data)\n",
+        "    plt.title(title)\n",
+        "    plt.xlabel(\"Time (in seconds)\")\n",
+        "    plt.ylabel(\"Amplitude\")\n",
+        "    plt.show()\n",
+        "\n",
+        "# Get the Min and Max Value of the singals\n",
+        "def getMinMaxValue(signalName):\n",
+        "    if signalName == \"SaO2\":\n",
+        "      return 90, 100\n",
+        "    if signalName == \"PR\":\n",
+        "      return 60, 100\n",
+        "    if signalName == \"POSITION\":\n",
+        "      return 0, 3\n",
+        "    if signalName == \"ABDO RES\":\n",
+        "      return -1, 1\n",
+        "\n",
+        "# Use min max normalization\n",
+        "def normalize(value, min_val, max_val):\n",
+        "    return (value - min_val) / (max_val - min_val)\n",
+        "\n",
+        "# Get time domain features.\n",
+        "def time_domain_features(signal):\n",
+        "    mean_val = np.mean(signal)\n",
+        "\n",
+        "    # Suppress warnings for this specific computation\n",
+        "    with warnings.catch_warnings():\n",
+        "        warnings.simplefilter(\"ignore\", category=RuntimeWarning)\n",
+        "        kurtosis_val = kurtosis(signal)\n",
+        "        skewness_val = skew(signal)\n",
+        "\n",
+        "        # Replace Nan Values with mean\n",
+        "        if np.isnan(kurtosis_val):\n",
+        "          kurtosis_val = mean_val\n",
+        "        if np.isnan(skewness_val):\n",
+        "          skewness_val = mean_val\n",
+        "\n",
+        "    std_dev = np.std(signal)\n",
+        "    variance = np.var(signal)\n",
+        "\n",
+        "    return mean_val, std_dev, variance, kurtosis_val, skewness_val\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "opeLCeC6rmlw"
+      },
+      "source": [
+        "# Read and Save Signals"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "NeZ9eyTNzXd2"
+      },
+      "outputs": [],
+      "source": [
+        "# Signals\n",
+        "important_signals = {\n",
+        "    'SaO2' : 0,\n",
+        "    'PR': 1,\n",
+        "    'ABDO RES': 10, # Abdomen has Sampling Rate 10\n",
+        "    'POSITION': 11\n",
+        "}\n",
+        "\n",
+        "# Variables\n",
+        "epoch_duration = 30\n",
+        "\n",
+        "# Create and open the CSV file for writing the header\n",
+        "with open('signal_data.csv', mode='w', newline='') as file:\n",
+        "    writer = csv.writer(file)\n",
+        "    writer.writerow(['stage', 'signalName', 'std', 'mean', 'variance', 'kurtosis', 'skewness'])\n",
+        "\n",
+        "\n",
+        "# Iterate over EDF files in the directory\n",
+        "print(f\"path_to_edf_files {path_to_edf_files}\")\n",
+        "\n",
+        "for filename in os.listdir(path_to_edf_files):\n",
+        "\n",
+        "  print(f\"\\n\\nEDF-File {filename}\")\n",
+        "  path_to_edf = f\"{path_to_edf_files}/{filename}\"\n",
+        "\n",
+        "  # Create xml filename\n",
+        "  filename_without_extension = filename.split(\".\")[0]\n",
+        "  xml_filename = filename_without_extension + \"-nsrr.xml\"\n",
+        "\n",
+        "  with open(f\"{path_to_annotations}/{xml_filename}\") as fd:\n",
+        "    doc = xmltodict.parse(fd.read())\n",
+        "\n",
+        "  # Create scored_events\n",
+        "  annotations = doc['PSGAnnotation']\n",
+        "  events = annotations['ScoredEvents']\n",
+        "  scored_events = events['ScoredEvent']\n",
+        "\n",
+        "  # Get the start time and duration of each sleep stage\n",
+        "  awake_times = []\n",
+        "  lite_sleep_times = []\n",
+        "  deep_sleep_times = []\n",
+        "  rem_sleep_times = []\n",
+        "\n",
+        "  for element in scored_events:\n",
+        "      if element['EventConcept'] == 'Wake|0':\n",
+        "        awake_times.append({\"start\": element[\"Start\"], \"duration\": element[\"Duration\"]})\n",
+        "      if element['EventConcept'] == 'Stage 1 sleep|1' or element['EventConcept'] == 'Stage 2 sleep|2':\n",
+        "        lite_sleep_times.append({\"start\": element[\"Start\"], \"duration\": element[\"Duration\"]})\n",
+        "      if element['EventConcept'] == 'Stage 3 sleep|3':\n",
+        "        deep_sleep_times.append({\"start\": element[\"Start\"], \"duration\": element[\"Duration\"]})\n",
+        "      if element['EventConcept'] == 'REM sleep|5':\n",
+        "        rem_sleep_times.append({\"start\": element[\"Start\"], \"duration\": element[\"Duration\"]})\n",
+        "\n",
+        "  sleep_stages = {\n",
+        "    \"awake\": awake_times,\n",
+        "    \"lite_sleep\": lite_sleep_times,\n",
+        "    \"deep_sleep\": deep_sleep_times,\n",
+        "    \"rem_sleep\": rem_sleep_times\n",
+        "  }\n",
+        "\n",
+        "  # Iterate over each EDF File\n",
+        "    # Iterate over each Sleep Stage\n",
+        "      # Iterate over each important Signal\n",
+        "        # For each Sleep Stage get start and duration\n",
+        "  try:\n",
+        "\n",
+        "    with pyedflib.EdfReader(path_to_edf) as f:\n",
+        "\n",
+        "      for stage, array_data in sleep_stages.items():\n",
+        "          for signal_name, signal_index in important_signals.items():\n",
+        "\n",
+        "            # get sample frequency\n",
+        "            sample_frequency = f.getSampleFrequency(signal_index)\n",
+        "\n",
+        "            for element in array_data:\n",
+        "              start_value = int(float(element['start']))\n",
+        "              duration_value = int(float(element['duration']))\n",
+        "\n",
+        "              # Read the Signal\n",
+        "              partial_signal_data = f.readSignal(chn=signal_index, start=start_value, n=duration_value)\n",
+        "\n",
+        "              # Preprocess\n",
+        "              # Downsample if necessary\n",
+        "              if sample_frequency != 1:\n",
+        "                duration = duration_value\n",
+        "                new_sample_frequency = 1\n",
+        "                new_length = duration * new_sample_frequency\n",
+        "                partial_signal_data = resample(partial_signal_data, new_length)\n",
+        "\n",
+        "              min_val, max_val = getMinMaxValue(signal_name)\n",
+        "              filtered_signal_data = normalize(partial_signal_data, min_val, max_val)\n",
+        "\n",
+        "              # remove outliner with Hampel-Filter\n",
+        "              result = hampel(partial_signal_data, window_size=5, n_sigma=5.0)\n",
+        "              filtered_signal_data = result.filtered_data\n",
+        "\n",
+        "              # Split Signal in 30 sec epochs\n",
+        "              epochs, remainings = calculate_epochs_and_remainings(duration_value, epoch_duration)\n",
+        "\n",
+        "              # Read signal in 30 sec epochs\n",
+        "              for i in range(epochs):\n",
+        "                start_index = i * epoch_duration\n",
+        "                end_index = start_index + epoch_duration\n",
+        "\n",
+        "                signal_data = filtered_signal_data[start_index:end_index]\n",
+        "\n",
+        "                # Feature extraction\n",
+        "                mean_val, std_dev_val, variance_val, kurtosis_val, skewness_val = time_domain_features(signal_data)\n",
+        "\n",
+        "                # Write the data to the CSV file\n",
+        "                with open('signal_data.csv', mode='a', newline='') as file:\n",
+        "                  writer = csv.writer(file)\n",
+        "                  writer.writerow([stage, signal_name, mean_val, std_dev_val, variance_val, kurtosis_val, skewness_val])\n",
+        "\n",
+        "  except Exception as e:\n",
+        "    print(f\"Error {filename} {e}\")\n",
+        "    traceback.print_exc()\n",
+        "    continue\n",
+        "\n",
+        "print(\"Finished\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "WI1XPMn4sBoj"
+      },
+      "source": [
+        "# Preprocess"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "--ulGYtFD_03"
+      },
+      "outputs": [],
+      "source": [
+        "data = pd.read_csv('signal_data.csv')\n",
+        "data = data.dropna()\n",
+        "\n",
+        "data = data[data['signalName'] != 'POSITION'] # Remove rows with signalName Position\n",
+        "data = data[data['signalName'] != 'ABDO RES'] # Remove rows with signalName ABDO RES\n",
+        "\n",
+        "# Write the preprocessed data to a new CSV file\n",
+        "#data.to_csv('preprocessed_signal_data.csv', index=False)\n",
+        "\n",
+        "data_without_stage = data.drop('stage', axis=1)\n",
+        "amount_trained_features = len(list(data_without_stage.columns))\n",
+        "print(f\"Amt Stages \\n{data['stage'].unique()}\")\n",
+        "\n",
+        "# Visualize the quantity of each stage\n",
+        "class_counts = data['stage'].value_counts()\n",
+        "class_counts.plot(kind='bar')\n",
+        "plt.xlabel('Class')\n",
+        "plt.ylabel('Amount of entries')\n",
+        "plt.title('Original Class Distribution')\n",
+        "plt.show()\n",
+        "\n",
+        "# Calculate CIF to check for imbalance dataset\n",
+        "total_entries = len(data)\n",
+        "min_stage_entries = data['stage'].value_counts().min()\n",
+        "cif = (total_entries / (2 * 4 * min_stage_entries))\n",
+        "print(f\"CIF {cif}\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "SH_cDL9oqMcf"
+      },
+      "outputs": [],
+      "source": [
+        "X = data[['signalName', 'std', 'mean', 'variance', 'kurtosis', 'skewness']]\n",
+        "\n",
+        "signal_name_encoder = LabelEncoder()\n",
+        "stage_encoder = LabelEncoder()\n",
+        "\n",
+        "X['signalName'] = signal_name_encoder.fit_transform(X['signalName'])\n",
+        "data['stage'] = stage_encoder.fit_transform(data['stage'])\n",
+        "\n",
+        "y = data['stage']\n",
+        "print(\"Unique values in y after encoding:\", np.unique(y))\n",
+        "\n",
+        "\n",
+        "over = SMOTE(sampling_strategy=\"not majority\") # Oversample only the minority class / The number of samples in the different classes will be equalized\n",
+        "under = RandomUnderSampler(sampling_strategy='not minority') # Undersample only the majority class\n",
+        "steps = [('o', over), ('u', under)]\n",
+        "pipeline = Pipeline(steps=steps)\n",
+        "X, y = over.fit_resample(X, y) # ONLY USE OVERSMPLE\n",
+        "counter = Counter(y)\n",
+        "print(counter)\n",
+        "\n",
+        "\n",
+        "# Plotting the class distribution\n",
+        "plt.bar(counter.keys(), counter.values())\n",
+        "plt.xlabel('Class')\n",
+        "plt.ylabel('Amount of entries')\n",
+        "plt.title('Resampled Class Distribution')\n",
+        "plt.show()\n",
+        "\n",
+        "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MmnmsE1tsOzk"
+      },
+      "source": [
+        "# Machine Learning"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "nlBhIDBO0R27"
+      },
+      "outputs": [],
+      "source": [
+        "# Random Forest\n",
+        "\n",
+        "# Hyperparameter\n",
+        "'''param_distributions = {\n",
+        "        'n_estimators': [50, 100],\n",
+        "        'max_depth': [10, 20],\n",
+        "        'min_samples_split': [10],\n",
+        "        'min_samples_leaf': [6],\n",
+        "        'bootstrap': [True]\n",
+        "}\n",
+        "\n",
+        "# RandomizedSearchCV for RF\n",
+        "#Best parameters: {'n_estimators': 100, 'min_samples_split': 5, 'min_samples_leaf': 6, 'max_depth': 20, 'bootstrap': True}\n",
+        "#Best parameters: {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 6, 'max_depth': 20, 'bootstrap': True}\n",
+        "best_params = {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 6, 'max_depth': 20, 'bootstrap': True}\n",
+        "\n",
+        "random_search = RandomizedSearchCV(\n",
+        "        estimator=RandomForestClassifier(random_state=42, class_weight='balanced'),\n",
+        "        param_distributions=param_distributions,\n",
+        "        n_iter=5,\n",
+        "        cv=5,\n",
+        "        verbose=2,\n",
+        "        random_state=42,\n",
+        "        n_jobs=-1\n",
+        ")\n",
+        "random_search.fit(X_train, y_train)\n",
+        "best_params = random_search.best_params_\n",
+        "best_score = random_search.best_score_\n",
+        "print(f\"Best parameters: {best_params}\")\n",
+        "print(f\"Best cross-validated score: {best_score}\")\n",
+        "\n",
+        "# Train\n",
+        "#rf_classifier = RandomForestClassifier(**best_params, random_state=42, class_weight='balanced') # With Hypertuning'''\n",
+        "\n",
+        "# Without Hypertuning\n",
+        "rf_classifier = RandomForestClassifier(random_state=42)\n",
+        "rf_classifier.fit(X_train, y_train)\n",
+        "\n",
+        "# Extract Feature Importance\n",
+        "feature_importances = rf_classifier.feature_importances_\n",
+        "\n",
+        "features_df = pd.DataFrame({\n",
+        "    'Feature': X.columns,\n",
+        "    'Importance': feature_importances\n",
+        "}).sort_values(by='Importance', ascending=False)\n",
+        "print(features_df)\n",
+        "\n",
+        "\n",
+        "# Make predictions on the test set\n",
+        "y_pred = rf_classifier.predict(X_test)\n",
+        "\n",
+        "# Evaluation\n",
+        "print(\"Classification report for RandomForestClassifier\")\n",
+        "print(classification_report(y_test, y_pred))\n",
+        "macro_f1 = f1_score(y_test, y_pred, average='macro')\n",
+        "print(f\"Macro-average F1 Score: {macro_f1}\")\n",
+        "kappa = cohen_kappa_score(y_test, y_pred)\n",
+        "print(f\"Cohen's Kappa: {kappa}\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "YGWlxbazIfMU"
+      },
+      "outputs": [],
+      "source": [
+        "# KNN\n",
+        "\n",
+        "# Hyperparameter for KNN\n",
+        "param_knn = {\n",
+        "    'n_neighbors': [3, 5, 7, 10, 15],\n",
+        "    'weights': ['uniform', 'distance'],\n",
+        "    'metric' : ['minkowski','euclidean','manhattan']\n",
+        "}\n",
+        "\n",
+        "# RandomizedSearchCV for KNN\n",
+        "knn_search = RandomizedSearchCV(\n",
+        "    estimator=KNeighborsClassifier(),\n",
+        "    param_distributions=param_knn,\n",
+        "    n_iter=5,\n",
+        "    cv=5,\n",
+        "    verbose=2,\n",
+        "    random_state=42,\n",
+        "    n_jobs=-1\n",
+        ")\n",
+        "\n",
+        "# Fit to the training data\n",
+        "knn_search.fit(X_train, y_train)\n",
+        "\n",
+        "# Retrieve the best parameters and score for KNN\n",
+        "best_params_knn = knn_search.best_params_\n",
+        "best_score_knn = knn_search.best_score_\n",
+        "\n",
+        "# Output the results for KNN\n",
+        "print(f\"Best parameters for KNN: {best_params_knn}\")\n",
+        "print(f\"Best cross-validated score for KNN: {best_score_knn}\")\n",
+        "\n",
+        "# Train KNN with the best parameters\n",
+        "knn_classifier = KNeighborsClassifier(**best_params_knn)\n",
+        "\n",
+        "# Without Hyperparameter Tuning\n",
+        "#knn_classifier = KNeighborsClassifier()\n",
+        "knn_classifier.fit(X_train, y_train)\n",
+        "\n",
+        "# Extrahieren der Feature Importance\n",
+        "feature_importances = rf_classifier.feature_importances_\n",
+        "features_df = pd.DataFrame({\n",
+        "    'Feature': X.columns,\n",
+        "    'Importance': feature_importances\n",
+        "}).sort_values(by='Importance', ascending=False)\n",
+        "print(features_df)\n",
+        "\n",
+        "\n",
+        "# Predict and evaluate KNN\n",
+        "y_pred_knn = knn_classifier.predict(X_test)\n",
+        "print(\"Classification report for KNeighborsClassifier:\")\n",
+        "print(classification_report(y_test, y_pred_knn))\n",
+        "\n",
+        "macro_f1 = f1_score(y_test, y_pred, average='macro')\n",
+        "print(f\"Macro-average F1 Score: {macro_f1}\")\n",
+        "\n",
+        "kappa_knn = cohen_kappa_score(y_test, y_pred_knn)\n",
+        "print(f\"Cohen's Kappa for KNN: {kappa_knn}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gTAMUQw8sWIl"
+      },
+      "source": [
+        "# Deep Learning"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "vgxUAedeRIkl"
+      },
+      "outputs": [],
+      "source": [
+        "# CNN\n",
+        "\n",
+        "print(\"Unique labels in training set:\", np.unique(y_train))\n",
+        "print(\"Unique labels in test set:\", np.unique(y_test))\n",
+        "\n",
+        "\n",
+        "def build_model(hp):\n",
+        "    model = Sequential()\n",
+        "    hp_filters = hp.Int('filters', min_value=32, max_value=128, step=32)\n",
+        "    hp_choice = hp.Choice('kernel_size', values=[3, 5])\n",
+        "    model.add(Conv1D(filters=hp_filters,\n",
+        "                     kernel_size=hp_choice,\n",
+        "                     activation='relu',\n",
+        "                     input_shape=(amount_trained_features, 1)))\n",
+        "    model.add(MaxPooling1D(2))\n",
+        "    model.add(Flatten())\n",
+        "    hp_unit_filter = hp.Int('units', min_value=64, max_value=128, step=32)\n",
+        "    model.add(Dense(units=hp_unit_filter, activation='relu'))\n",
+        "    model.add(Dense(len(np.unique(y)), activation='softmax'))\n",
+        "\n",
+        "    hp_learning_rate = hp.Choice('learning_rate', values = [1e-2, 1e-3])\n",
+        "    opt = Adam(learning_rate=hp_learning_rate)\n",
+        "\n",
+        "    model.compile(optimizer=opt,loss='sparse_categorical_crossentropy',\n",
+        "                  metrics=['accuracy'])\n",
+        "    return model\n",
+        "\n",
+        "tuner = RandomSearch(\n",
+        "    hypermodel=build_model,\n",
+        "    objective='val_accuracy',\n",
+        "    max_trials=4,\n",
+        "    executions_per_trial=1\n",
+        ")\n",
+        "\n",
+        "tuner.search_space_summary()\n",
+        "\n",
+        "# Start the search and get the best model\n",
+        "tuner.search(X_train, y_train, epochs=10, validation_split=0.2, batch_size=62)\n",
+        "best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]\n",
+        "\n",
+        "# Build the model with the best hyperparameters\n",
+        "model_cnn = build_model(best_hps)\n",
+        "model_cnn.fit(X_train, y_train, epochs=10, batch_size=62)\n",
+        "\n",
+        "# Make predictions\n",
+        "y_pred_probs = model_cnn.predict(X_test)\n",
+        "y_pred_classes = np.argmax(y_pred_probs, axis=1)\n",
+        "\n",
+        "test_loss, test_acc = model_cnn.evaluate(X_test, y_test, verbose=2)\n",
+        "\n",
+        "# Evaluation\n",
+        "kappa = cohen_kappa_score(y_test, y_pred_classes)\n",
+        "print(f\"Cohen's Kappa: {kappa}\")\n",
+        "\n",
+        "y_pred_labels = np.argmax(y_pred_probs, axis=1)\n",
+        "recall = recall_score(y_test, y_pred_labels, average='weighted')\n",
+        "print('Recall: %f' % recall)\n",
+        "\n",
+        "f1 = f1_score(y_test, y_pred_labels, average='weighted')\n",
+        "print('F1 score: %f' % f1)\n",
+        "\n",
+        "target_names = [str(name) for name in stage_encoder.inverse_transform([i for i in range(len(stage_encoder.classes_))])]\n",
+        "\n",
+        "print(classification_report(y_test, y_pred_classes, target_names=target_names))\n",
+        "\n",
+        "macro_f1 = f1_score(y_test, y_pred_classes, average='macro')\n",
+        "print(f\"Macro-average F1 Score: {macro_f1}\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "v6LAuRFgW5Kh"
+      },
+      "outputs": [],
+      "source": [
+        "# LSTM\n",
+        "\n",
+        "def build_model(hp):\n",
+        "    model = Sequential([\n",
+        "        LSTM(hp.Int('units', min_value=32, max_value=128, step=32),\n",
+        "             activation='relu',\n",
+        "             input_shape=(amount_trained_features, 1)),\n",
+        "        Dense(len(np.unique(y)), activation='softmax')\n",
+        "    ])\n",
+        "\n",
+        "    model.compile(optimizer=Adam(hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),\n",
+        "                  loss='sparse_categorical_crossentropy',\n",
+        "                  metrics=['accuracy'])\n",
+        "    return model\n",
+        "\n",
+        "tuner = RandomSearch(\n",
+        "    hypermodel=build_model,\n",
+        "    objective='val_accuracy',\n",
+        "    max_trials=4,\n",
+        "    executions_per_trial=1\n",
+        ")\n",
+        "\n",
+        "tuner.search(X_train, y_train, epochs=10, validation_split=0.2, batch_size=62)\n",
+        "\n",
+        "# Get the best hyperparameters\n",
+        "best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]\n",
+        "print(f\"Best Hyperparameters {best_hps}\")\n",
+        "\n",
+        "# Build the model with the best hyperparameters and train it on the data\n",
+        "model = tuner.hypermodel.build(best_hps)\n",
+        "history = model.fit(X_train, y_train, epochs=20, batch_size=64, validation_split=0.2)\n",
+        "\n",
+        "# Extrahieren der Feature Importance\n",
+        "feature_importances = rf_classifier.feature_importances_\n",
+        "features_df = pd.DataFrame({\n",
+        "    'Feature': X.columns,\n",
+        "    'Importance': feature_importances\n",
+        "}).sort_values(by='Importance', ascending=False)\n",
+        "print(features_df)\n",
+        "\n",
+        "\n",
+        "\n",
+        "y_pred_probs = model.predict(X_test)\n",
+        "y_pred_classes = np.argmax(y_pred_probs, axis=1)\n",
+        "\n",
+        "test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)\n",
+        "\n",
+        "# Evaluation\n",
+        "kappa = cohen_kappa_score(y_test, y_pred_classes)\n",
+        "print(f\"Cohen's Kappa: {kappa}\")\n",
+        "\n",
+        "y_pred_labels = np.argmax(y_pred_probs, axis=1)\n",
+        "recall = recall_score(y_test, y_pred_labels, average='weighted')\n",
+        "print('Recall: %f' % recall)\n",
+        "\n",
+        "f1 = f1_score(y_test, y_pred_labels, average='weighted')\n",
+        "print('F1 score: %f' % f1)\n",
+        "\n",
+        "target_names = [str(name) for name in stage_encoder.inverse_transform([i for i in range(len(stage_encoder.classes_))])]\n",
+        "\n",
+        "print(classification_report(y_test, y_pred_classes, target_names=target_names))\n",
+        "macro_f1 = f1_score(y_test, y_pred_classes, average='macro')\n",
+        "print(f\"Macro-average F1 Score: {macro_f1}\")"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
\ No newline at end of file