diff --git a/SleepStageModels.ipynb b/SleepStageModels.ipynb deleted file mode 100644 index 5fea506e21579de37e9781038a1a57f3f61dd16d..0000000000000000000000000000000000000000 --- a/SleepStageModels.ipynb +++ /dev/null @@ -1,973 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "L300ySZBqbyH" - }, - "source": [ - "# Installation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "YSztofTQiTt7" - }, - "outputs": [], - "source": [ - "!pip install pyedflib\n", - "!pip install numpy\n", - "!pip install xmltodict\n", - "!pip install mne\n", - "!pip install tensorflow\n", - "!pip install pandas\n", - "!pip install scikit-learn\n", - "!pip install hampel\n", - "!pip install keras-tuner" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "D8J3VziBqm-p" - }, - "source": [ - "# Prepare data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "2X2HY602mIwc", - "outputId": "4fb06705-6620-49d1-c0d2-fbedb5176c58" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Mounted at /content/drive\n" - ] - } - ], - "source": [ - "# Mount to google drive\n", - "\n", - "from google.colab import drive\n", - "drive.mount('/content/drive')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "n4FwOJ0Dm7ro" - }, - "outputs": [], - "source": [ - "# create Folders to store the data from google drive\n", - "import os\n", - "\n", - "path_to_edf_files = 'edf_files'\n", - "if not os.path.exists(path_to_edf_files):\n", - " os.mkdir(path_to_edf_files)\n", - "\n", - "path_to_annotations = 'annotations' #/content/drive/My Drive/annotations'\n", - "if not os.path.exists(path_to_annotations):\n", - " os.mkdir(path_to_annotations)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "v3vZ-kppi4DA" - }, - "outputs": [], - "source": [ - "import shutil\n", - "# Be carefull not to delete folder by accident (always comment out after running it)\n", - "#shutil.rmtree(path_to_annotations)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "hKFNmDrEmNpy" - }, - "outputs": [], - "source": [ - "import zipfile\n", - "\n", - "# Info: All edf and annotation Files are stored in multiple zip_files in google drive.\n", - " # Root folder for edf files is path_to_all_edf_zip_folder\n", - " # Root folder for annotation files is path_to_all_annotation_zip_folder\n", - "\n", - "path_to_all_edf_zip_folder = '/content/drive/My Drive/shhs2_edf_zip'\n", - "path_to_all_annotation_zip_folder = '/content/drive/My Drive/shhs2_annotation_zip'\n", - "\n", - "\n", - "# Unzip all files from path_to_all_edf_zip_folder into folder 'edf_files'\n", - "for filename in os.listdir(path_to_all_edf_zip_folder):\n", - " if filename.endswith('.zip'):\n", - " zip_path = os.path.join(path_to_all_edf_zip_folder, filename)\n", - "\n", - " # Open ZIP-File\n", - " with zipfile.ZipFile(zip_path, 'r') as zip_ref:\n", - " # extract files into path_to_edf_files\n", - " print(zip_path)\n", - " !unzip \"$zip_path\" -d \"$path_to_edf_files\"\n", - "\n", - " print(f'Extrahiert: {filename}')\n", - "\n", - "\n", - "# Unzip all files from path_to_all_annotation_zip_folder into folder 'annotations'\n", - "for filename in os.listdir(path_to_all_annotation_zip_folder):\n", - " if filename.endswith('.zip'):\n", - " zip_path = os.path.join(path_to_all_annotation_zip_folder, filename)\n", - "\n", - " # Open ZIP-File\n", - " with zipfile.ZipFile(zip_path, 'r') as zip_ref:\n", - " # # extract files into path_to_annotations\n", - " print(zip_path)\n", - " !unzip \"$zip_path\" -d \"$path_to_annotations\"\n", - "\n", - " print(f'Extrahiert: {filename}')\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "VdTlPBDxrJYL" - }, - "source": [ - "# Imports" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "V8AE7mFJjbI3" - }, - "outputs": [], - "source": [ - "import pyedflib\n", - "import numpy as np\n", - "import os\n", - "import xmltodict\n", - "import mne\n", - "import csv\n", - "import matplotlib.pyplot as plt\n", - "import pandas as pd\n", - "from scipy.signal import butter, lfilter, resample\n", - "from hampel import hampel\n", - "import numpy\n", - "import statistics\n", - "import pywt\n", - "from scipy.fft import fft, ifft, fftfreq\n", - "\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.ensemble import RandomForestClassifier\n", - "from sklearn.neighbors import KNeighborsClassifier\n", - "from sklearn.metrics import classification_report, cohen_kappa_score\n", - "from sklearn.preprocessing import LabelEncoder\n", - "import imblearn\n", - "from collections import Counter\n", - "from sklearn.datasets import make_classification\n", - "from matplotlib import pyplot\n", - "from numpy import where\n", - "from imblearn.over_sampling import SMOTE\n", - "from imblearn.under_sampling import RandomUnderSampler\n", - "from imblearn.pipeline import Pipeline\n", - "from sklearn.model_selection import train_test_split, RandomizedSearchCV, cross_val_predict, cross_val_score\n", - "\n", - "from scipy.stats import skew, kurtosis\n", - "from tensorflow.keras.models import Sequential\n", - "from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense\n", - "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.metrics import classification_report, cohen_kappa_score\n", - "from sklearn.preprocessing import LabelEncoder\n", - "\n", - "from sklearn.metrics import precision_score\n", - "from sklearn.metrics import recall_score\n", - "from sklearn.metrics import f1_score\n", - "\n", - "from sklearn.metrics import cohen_kappa_score, recall_score, f1_score, classification_report\n", - "from tensorflow.keras.optimizers import Adam\n", - "import kerastuner as kt\n", - "from kerastuner.tuners import RandomSearch\n", - "\n", - "import traceback\n", - "import warnings" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "24EeYQ3DrNrt" - }, - "source": [ - "# Read available channels" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "HHNMzn3Gcu3N", - "outputId": "0e3c6aa2-f5bd-4507-c9da-cb460d6cec06" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Extracting EDF parameters from /content/edf_files/shhs2-200646.edf...\n", - "EDF file detected\n", - "Setting channel info structure...\n", - "Creating raw.info structure...\n", - "['SaO2', 'H.R.', 'EEG(sec)', 'ECG', 'EMG', 'EOG(L)', 'EOG(R)', 'EEG', 'AIRFLOW', 'THOR RES', 'ABDO RES', 'POSITION', 'LIGHT', 'OX stat']\n" - ] - } - ], - "source": [ - "for filename in os.listdir(path_to_edf_files):\n", - " data = mne.io.read_raw_edf(path_to_edf_files + \"/\" + filename)\n", - " raw_data = data.get_data()\n", - " channel_names = data.ch_names\n", - " print(channel_names)\n", - " break\n", - "\n", - "# I only use PR, SaO2, Position and ABDO RES" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "DD5MaGB4rUKv" - }, - "source": [ - "# Functions" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "c6rjgpCwrYQK" - }, - "outputs": [], - "source": [ - "def calculate_epochs_and_remainings(total_seconds, epoch_duration):\n", - " # Calculate the number of complete epochs\n", - " epochs_completed = total_seconds // epoch_duration\n", - "\n", - " # Calculate the remaining seconds\n", - " remaining_seconds = total_seconds % epoch_duration\n", - "\n", - " return epochs_completed, remaining_seconds\n", - "\n", - "# Visualize signal\n", - "def visualize_signal(data, title):\n", - " \"\"\"\n", - " Visualize EEG signal data.\n", - " :param data: EEG signal data as a list or numpy array.\n", - " \"\"\"\n", - " plt.figure(figsize=(12, 6))\n", - " plt.plot(data)\n", - " plt.title(title)\n", - " plt.xlabel(\"Time (in seconds)\")\n", - " plt.ylabel(\"Amplitude\")\n", - " plt.show()\n", - "\n", - "# Get the Min and Max Value of the singals\n", - "def getMinMaxValue(signalName):\n", - " if signalName == \"SaO2\":\n", - " return 90, 100\n", - " if signalName == \"PR\":\n", - " return 60, 100\n", - " if signalName == \"POSITION\":\n", - " return 0, 3\n", - " if signalName == \"ABDO RES\":\n", - " return -1, 1\n", - "\n", - "# Use min max normalization\n", - "def normalize(value, min_val, max_val):\n", - " return (value - min_val) / (max_val - min_val)\n", - "\n", - "# Get time domain features.\n", - "def time_domain_features(signal):\n", - " mean_val = np.mean(signal)\n", - "\n", - " # Suppress warnings for this specific computation\n", - " with warnings.catch_warnings():\n", - " warnings.simplefilter(\"ignore\", category=RuntimeWarning)\n", - " kurtosis_val = kurtosis(signal)\n", - " skewness_val = skew(signal)\n", - "\n", - " # Replace Nan Values with mean\n", - " if np.isnan(kurtosis_val):\n", - " kurtosis_val = mean_val\n", - " if np.isnan(skewness_val):\n", - " skewness_val = mean_val\n", - "\n", - " std_dev = np.std(signal)\n", - " variance = np.var(signal)\n", - "\n", - " return mean_val, std_dev, variance, kurtosis_val, skewness_val\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "opeLCeC6rmlw" - }, - "source": [ - "# Read and Save Signals" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "NeZ9eyTNzXd2" - }, - "outputs": [], - "source": [ - "# Signals\n", - "important_signals = {\n", - " 'SaO2' : 0,\n", - " 'PR': 1,\n", - " 'ABDO RES': 10, # Abdomen has Sampling Rate 10\n", - " 'POSITION': 11\n", - "}\n", - "\n", - "# Variables\n", - "epoch_duration = 30\n", - "\n", - "# Create and open the CSV file for writing the header\n", - "with open('signal_data.csv', mode='w', newline='') as file:\n", - " writer = csv.writer(file)\n", - " writer.writerow(['stage', 'signalName', 'std', 'mean', 'variance', 'kurtosis', 'skewness'])\n", - "\n", - "\n", - "# Iterate over EDF files in the directory\n", - "print(f\"path_to_edf_files {path_to_edf_files}\")\n", - "\n", - "for filename in os.listdir(path_to_edf_files):\n", - "\n", - " print(f\"\\n\\nEDF-File {filename}\")\n", - " path_to_edf = f\"{path_to_edf_files}/{filename}\"\n", - "\n", - " # Create xml filename\n", - " filename_without_extension = filename.split(\".\")[0]\n", - " xml_filename = filename_without_extension + \"-nsrr.xml\"\n", - "\n", - " with open(f\"{path_to_annotations}/{xml_filename}\") as fd:\n", - " doc = xmltodict.parse(fd.read())\n", - "\n", - " # Create scored_events\n", - " annotations = doc['PSGAnnotation']\n", - " events = annotations['ScoredEvents']\n", - " scored_events = events['ScoredEvent']\n", - "\n", - " # Get the start time and duration of each sleep stage\n", - " awake_times = []\n", - " lite_sleep_times = []\n", - " deep_sleep_times = []\n", - " rem_sleep_times = []\n", - "\n", - " for element in scored_events:\n", - " if element['EventConcept'] == 'Wake|0':\n", - " awake_times.append({\"start\": element[\"Start\"], \"duration\": element[\"Duration\"]})\n", - " if element['EventConcept'] == 'Stage 1 sleep|1' or element['EventConcept'] == 'Stage 2 sleep|2':\n", - " lite_sleep_times.append({\"start\": element[\"Start\"], \"duration\": element[\"Duration\"]})\n", - " if element['EventConcept'] == 'Stage 3 sleep|3':\n", - " deep_sleep_times.append({\"start\": element[\"Start\"], \"duration\": element[\"Duration\"]})\n", - " if element['EventConcept'] == 'REM sleep|5':\n", - " rem_sleep_times.append({\"start\": element[\"Start\"], \"duration\": element[\"Duration\"]})\n", - "\n", - " sleep_stages = {\n", - " \"awake\": awake_times,\n", - " \"lite_sleep\": lite_sleep_times,\n", - " \"deep_sleep\": deep_sleep_times,\n", - " \"rem_sleep\": rem_sleep_times\n", - " }\n", - "\n", - " # Iterate over each EDF File\n", - " # Iterate over each Sleep Stage\n", - " # Iterate over each important Signal\n", - " # For each Sleep Stage get start and duration\n", - " try:\n", - "\n", - " with pyedflib.EdfReader(path_to_edf) as f:\n", - "\n", - " # Read the whole Signal and store in seperate csv File. This is explained in the bachelor Thesis.\n", - " whole_signal = f.readSignal(chn=signal_index)\n", - "\n", - " for stage, array_data in sleep_stages.items():\n", - " for signal_name, signal_index in important_signals.items():\n", - "\n", - " # get sample frequency\n", - " sample_frequency = f.getSampleFrequency(signal_index)\n", - "\n", - " for element in array_data:\n", - " start_value = int(float(element['start']))\n", - " duration_value = int(float(element['duration']))\n", - "\n", - " # Read the Signal\n", - " partial_signal_data = f.readSignal(chn=signal_index, start=start_value, n=duration_value)\n", - "\n", - " # Preprocess\n", - " # Downsample if necessary\n", - " if sample_frequency != 1:\n", - " duration = duration_value\n", - " new_sample_frequency = 1\n", - " new_length = duration * new_sample_frequency\n", - " partial_signal_data = resample(partial_signal_data, new_length)\n", - "\n", - " min_val, max_val = getMinMaxValue(signal_name)\n", - " filtered_signal_data = normalize(filtered_signal_data, min_val, max_val)\n", - "\n", - " # remove outliner with Hampel-Filter\n", - " result = hampel(partial_signal_data, window_size=5, n_sigma=5.0)\n", - " filtered_signal_data = result.filtered_data\n", - "\n", - " # Split Signal in 30 sec epochs\n", - " epochs, remainings = calculate_epochs_and_remainings(duration_value, epoch_duration)\n", - "\n", - " # Read signal in 30 sec epochs\n", - " for i in range(epochs):\n", - " start_index = i * epoch_duration\n", - " end_index = start_index + epoch_duration\n", - "\n", - " signal_data = filtered_signal_data[start_index:end_index]\n", - "\n", - " # Feature extraction\n", - " mean_val, std_dev_val, variance_val, kurtosis_val, skewness_val = time_domain_features(signal_data)\n", - "\n", - " # Write the data to the CSV file\n", - " with open('signal_data.csv', mode='a', newline='') as file:\n", - " writer = csv.writer(file)\n", - " writer.writerow([stage, signal_name, mean_val, std_dev_val, variance_val, kurtosis_val, skewness_val])\n", - "\n", - " except Exception as e:\n", - " print(f\"Error {filename} {e}\")\n", - " traceback.print_exc()\n", - " continue\n", - "\n", - "print(\"Finished\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "WI1XPMn4sBoj" - }, - "source": [ - "# Preprocess" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "--ulGYtFD_03" - }, - "outputs": [], - "source": [ - "data = pd.read_csv('signal_data.csv')\n", - "data = data.dropna()\n", - "\n", - "data = data[data['signalName'] != 'POSITION'] # Remove rows with signalName Position\n", - "data = data[data['signalName'] != 'ABDO RES'] # Remove rows with signalName ABDO RES\n", - "\n", - "# Write the preprocessed data to a new CSV file\n", - "data.to_csv('preprocessed_signal_data.csv', index=False)\n", - "\n", - "data_without_stage = data.drop('stage', axis=1)\n", - "amount_trained_features = len(list(data_without_stage.columns))\n", - "print(f\"Amt Stages \\n{data['stage'].unique()}\")\n", - "\n", - "# Visualize the quantity of each stage\n", - "class_counts = data['stage'].value_counts()\n", - "class_counts.plot(kind='bar')\n", - "plt.xlabel('Class')\n", - "plt.ylabel('Amount of entries')\n", - "plt.title('Original Class Distribution')\n", - "plt.show()\n", - "\n", - "# Calculate CIF to check for imbalance dataset\n", - "total_entries = len(data)\n", - "min_stage_entries = data['stage'].value_counts().min()\n", - "cif = (total_entries / (2 * 4 * min_stage_entries))\n", - "print(f\"CIF {cif}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "SH_cDL9oqMcf" - }, - "outputs": [], - "source": [ - "X = data[['signalName', 'std', 'mean', 'variance', 'kurtosis', 'skewness']]\n", - "\n", - "signal_name_encoder = LabelEncoder()\n", - "stage_encoder = LabelEncoder()\n", - "\n", - "X['signalName'] = signal_name_encoder.fit_transform(X['signalName'])\n", - "data['stage'] = stage_encoder.fit_transform(data['stage'])\n", - "\n", - "y = data['stage']\n", - "print(\"Unique values in y after encoding:\", np.unique(y))\n", - "\n", - "\n", - "over = SMOTE(sampling_strategy=\"not majority\") # Oversample only the minority class / The number of samples in the different classes will be equalized\n", - "under = RandomUnderSampler(sampling_strategy='not minority') # Undersample only the majority class\n", - "steps = [('o', over), ('u', under)]\n", - "pipeline = Pipeline(steps=steps)\n", - "X, y = over.fit_resample(X, y) # ONLY USE OVERSMPLE\n", - "counter = Counter(y)\n", - "print(counter)\n", - "\n", - "\n", - "# Plotting the class distribution\n", - "plt.bar(counter.keys(), counter.values())\n", - "plt.xlabel('Class')\n", - "plt.ylabel('Amount of entries')\n", - "plt.title('Resampled Class Distribution')\n", - "plt.show()\n", - "\n", - "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MmnmsE1tsOzk" - }, - "source": [ - "# Machine Learning" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "nlBhIDBO0R27" - }, - "outputs": [], - "source": [ - "# Random Forest\n", - "\n", - "# Hyperparameter\n", - "'''param_distributions = {\n", - " 'n_estimators': [50, 100],\n", - " 'max_depth': [10, 20],\n", - " 'min_samples_split': [10],\n", - " 'min_samples_leaf': [6],\n", - " 'bootstrap': [True]\n", - "}\n", - "\n", - "# RandomizedSearchCV for RF\n", - "#Best parameters: {'n_estimators': 100, 'min_samples_split': 5, 'min_samples_leaf': 6, 'max_depth': 20, 'bootstrap': True}\n", - "#Best parameters: {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 6, 'max_depth': 20, 'bootstrap': True}\n", - "best_params = {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 6, 'max_depth': 20, 'bootstrap': True}\n", - "\n", - "random_search = RandomizedSearchCV(\n", - " estimator=RandomForestClassifier(random_state=42, class_weight='balanced'),\n", - " param_distributions=param_distributions,\n", - " n_iter=5,\n", - " cv=5,\n", - " verbose=2,\n", - " random_state=42,\n", - " n_jobs=-1\n", - ")\n", - "random_search.fit(X_train, y_train)\n", - "best_params = random_search.best_params_\n", - "best_score = random_search.best_score_\n", - "print(f\"Best parameters: {best_params}\")\n", - "print(f\"Best cross-validated score: {best_score}\")\n", - "\n", - "# Train\n", - "#rf_classifier = RandomForestClassifier(**best_params, random_state=42, class_weight='balanced') # With Hypertuning'''\n", - "\n", - "# Without Hypertuning\n", - "rf_classifier = RandomForestClassifier(random_state=42)\n", - "rf_classifier.fit(X_train, y_train)\n", - "\n", - "# Extract Feature Importance\n", - "feature_importances = rf_classifier.feature_importances_\n", - "\n", - "features_df = pd.DataFrame({\n", - " 'Feature': X.columns,\n", - " 'Importance': feature_importances\n", - "}).sort_values(by='Importance', ascending=False)\n", - "print(features_df)\n", - "\n", - "\n", - "# Make predictions on the test set\n", - "y_pred = rf_classifier.predict(X_test)\n", - "\n", - "# Evaluation\n", - "print(\"Classification report for RandomForestClassifier\")\n", - "print(classification_report(y_test, y_pred))\n", - "macro_f1 = f1_score(y_test, y_pred, average='macro')\n", - "print(f\"Macro-average F1 Score: {macro_f1}\")\n", - "kappa = cohen_kappa_score(y_test, y_pred)\n", - "print(f\"Cohen's Kappa: {kappa}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "YGWlxbazIfMU" - }, - "outputs": [], - "source": [ - "# KNN\n", - "\n", - "# Hyperparameter for KNN\n", - "param_knn = {\n", - " 'n_neighbors': [3, 5, 7, 10, 15],\n", - " 'weights': ['uniform', 'distance'],\n", - " 'metric' : ['minkowski','euclidean','manhattan']\n", - "}\n", - "\n", - "# RandomizedSearchCV for KNN\n", - "knn_search = RandomizedSearchCV(\n", - " estimator=KNeighborsClassifier(),\n", - " param_distributions=param_knn,\n", - " n_iter=5,\n", - " cv=5,\n", - " verbose=2,\n", - " random_state=42,\n", - " n_jobs=-1\n", - ")\n", - "\n", - "# Fit to the training data\n", - "knn_search.fit(X_train, y_train)\n", - "\n", - "# Retrieve the best parameters and score for KNN\n", - "best_params_knn = knn_search.best_params_\n", - "best_score_knn = knn_search.best_score_\n", - "\n", - "# Output the results for KNN\n", - "print(f\"Best parameters for KNN: {best_params_knn}\")\n", - "print(f\"Best cross-validated score for KNN: {best_score_knn}\")\n", - "\n", - "# Train KNN with the best parameters\n", - "knn_classifier = KNeighborsClassifier(**best_params_knn)\n", - "\n", - "# Without Hyperparameter Tuning\n", - "#knn_classifier = KNeighborsClassifier()\n", - "knn_classifier.fit(X_train, y_train)\n", - "\n", - "# Extrahieren der Feature Importance\n", - "feature_importances = rf_classifier.feature_importances_\n", - "features_df = pd.DataFrame({\n", - " 'Feature': X.columns,\n", - " 'Importance': feature_importances\n", - "}).sort_values(by='Importance', ascending=False)\n", - "print(features_df)\n", - "\n", - "\n", - "# Predict and evaluate KNN\n", - "y_pred_knn = knn_classifier.predict(X_test)\n", - "print(\"Classification report for KNeighborsClassifier:\")\n", - "print(classification_report(y_test, y_pred_knn))\n", - "\n", - "macro_f1 = f1_score(y_test, y_pred, average='macro')\n", - "print(f\"Macro-average F1 Score: {macro_f1}\")\n", - "\n", - "kappa_knn = cohen_kappa_score(y_test, y_pred_knn)\n", - "print(f\"Cohen's Kappa for KNN: {kappa_knn}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "gTAMUQw8sWIl" - }, - "source": [ - "# Deep Learning" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "vgxUAedeRIkl" - }, - "outputs": [], - "source": [ - "# CNN\n", - "\n", - "print(\"Unique labels in training set:\", np.unique(y_train))\n", - "print(\"Unique labels in test set:\", np.unique(y_test))\n", - "\n", - "\n", - "def build_model(hp):\n", - " model = Sequential()\n", - " hp_filters = hp.Int('filters', min_value=32, max_value=128, step=32)\n", - " hp_choice = hp.Choice('kernel_size', values=[3, 5])\n", - " model.add(Conv1D(filters=hp_filters,\n", - " kernel_size=hp_choice,\n", - " activation='relu',\n", - " input_shape=(amount_trained_features, 1)))\n", - " model.add(MaxPooling1D(2))\n", - " model.add(Flatten())\n", - " hp_unit_filter = hp.Int('units', min_value=64, max_value=128, step=32)\n", - " model.add(Dense(units=hp_unit_filter, activation='relu'))\n", - " model.add(Dense(len(np.unique(y)), activation='softmax'))\n", - "\n", - " hp_learning_rate = hp.Choice('learning_rate', values = [1e-2, 1e-3])\n", - " opt = Adam(learning_rate=hp_learning_rate)\n", - "\n", - " model.compile(optimizer=opt,loss='sparse_categorical_crossentropy',\n", - " metrics=['accuracy'])\n", - " return model\n", - "\n", - "tuner = RandomSearch(\n", - " hypermodel=build_model,\n", - " objective='val_accuracy',\n", - " max_trials=4,\n", - " executions_per_trial=1\n", - ")\n", - "\n", - "tuner.search_space_summary()\n", - "\n", - "# Start the search and get the best model\n", - "tuner.search(X_train, y_train, epochs=10, validation_split=0.2, batch_size=62)\n", - "best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]\n", - "\n", - "# Build the model with the best hyperparameters\n", - "model_cnn = build_model(best_hps)\n", - "model_cnn.fit(X_train, y_train, epochs=10, batch_size=62)\n", - "\n", - "# Make predictions\n", - "y_pred_probs = model_cnn.predict(X_test)\n", - "y_pred_classes = np.argmax(y_pred_probs, axis=1)\n", - "\n", - "test_loss, test_acc = model_cnn.evaluate(X_test, y_test, verbose=2)\n", - "\n", - "# Evaluation\n", - "kappa = cohen_kappa_score(y_test, y_pred_classes)\n", - "print(f\"Cohen's Kappa: {kappa}\")\n", - "\n", - "y_pred_labels = np.argmax(y_pred_probs, axis=1)\n", - "recall = recall_score(y_test, y_pred_labels, average='weighted')\n", - "print('Recall: %f' % recall)\n", - "\n", - "f1 = f1_score(y_test, y_pred_labels, average='weighted')\n", - "print('F1 score: %f' % f1)\n", - "\n", - "target_names = [str(name) for name in stage_encoder.inverse_transform([i for i in range(len(stage_encoder.classes_))])]\n", - "\n", - "print(classification_report(y_test, y_pred_classes, target_names=target_names))\n", - "\n", - "macro_f1 = f1_score(y_test, y_pred_classes, average='macro')\n", - "print(f\"Macro-average F1 Score: {macro_f1}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "v6LAuRFgW5Kh", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "f3f1f4d8-6b61-4270-9056-f810d97ae37c" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Reloading Tuner from ./untitled_project/tuner0.json\n", - "Best Hyperparameters <keras_tuner.src.engine.hyperparameters.hyperparameters.HyperParameters object at 0x7bcfdcb078b0>\n", - "Epoch 1/20\n", - "24943/24943 [==============================] - 178s 7ms/step - loss: 1.1905 - accuracy: 0.4265 - val_loss: 1.1850 - val_accuracy: 0.4321\n", - "Epoch 2/20\n", - "24943/24943 [==============================] - 167s 7ms/step - loss: 1.1816 - accuracy: 0.4326 - val_loss: 1.1785 - val_accuracy: 0.4352\n", - "Epoch 3/20\n", - "24943/24943 [==============================] - 179s 7ms/step - loss: 1.1789 - accuracy: 0.4341 - val_loss: 1.1769 - val_accuracy: 0.4356\n", - "Epoch 4/20\n", - "24943/24943 [==============================] - 173s 7ms/step - loss: 1.1771 - accuracy: 0.4354 - val_loss: 1.1745 - val_accuracy: 0.4384\n", - "Epoch 5/20\n", - "24943/24943 [==============================] - 178s 7ms/step - loss: 1.1761 - accuracy: 0.4360 - val_loss: 1.1758 - val_accuracy: 0.4359\n", - "Epoch 6/20\n", - "24943/24943 [==============================] - 174s 7ms/step - loss: 1.1752 - accuracy: 0.4364 - val_loss: 1.1764 - val_accuracy: 0.4375\n", - "Epoch 7/20\n", - "24943/24943 [==============================] - 168s 7ms/step - loss: 1.1746 - accuracy: 0.4370 - val_loss: 1.1735 - val_accuracy: 0.4356\n", - "Epoch 8/20\n", - "24943/24943 [==============================] - 166s 7ms/step - loss: 1.1742 - accuracy: 0.4374 - val_loss: 1.1729 - val_accuracy: 0.4382\n", - "Epoch 9/20\n", - "24943/24943 [==============================] - 173s 7ms/step - loss: 1.1736 - accuracy: 0.4377 - val_loss: 1.1766 - val_accuracy: 0.4369\n", - "Epoch 10/20\n", - "24943/24943 [==============================] - 180s 7ms/step - loss: 1.1732 - accuracy: 0.4381 - val_loss: 1.1713 - val_accuracy: 0.4394\n", - "Epoch 11/20\n", - "24943/24943 [==============================] - 202s 8ms/step - loss: 1.1732 - accuracy: 0.4383 - val_loss: 1.1710 - val_accuracy: 0.4398\n", - "Epoch 12/20\n", - "24943/24943 [==============================] - 167s 7ms/step - loss: 1.1729 - accuracy: 0.4384 - val_loss: 1.1740 - val_accuracy: 0.4380\n", - "Epoch 13/20\n", - "24943/24943 [==============================] - 168s 7ms/step - loss: 1.1729 - accuracy: 0.4386 - val_loss: 1.1716 - val_accuracy: 0.4379\n", - "Epoch 14/20\n", - "24943/24943 [==============================] - 170s 7ms/step - loss: 1.1726 - accuracy: 0.4384 - val_loss: 1.1749 - val_accuracy: 0.4378\n", - "Epoch 15/20\n", - "24943/24943 [==============================] - 167s 7ms/step - loss: 1.1727 - accuracy: 0.4390 - val_loss: 1.1792 - val_accuracy: 0.4383\n", - "Epoch 16/20\n", - "24943/24943 [==============================] - 167s 7ms/step - loss: 1.1736 - accuracy: 0.4387 - val_loss: 1.1728 - val_accuracy: 0.4387\n", - "Epoch 17/20\n", - "24943/24943 [==============================] - 171s 7ms/step - loss: 1.1723 - accuracy: 0.4391 - val_loss: 1.1709 - val_accuracy: 0.4399\n", - "Epoch 18/20\n", - "24943/24943 [==============================] - 172s 7ms/step - loss: 1.1723 - accuracy: 0.4393 - val_loss: 1.1723 - val_accuracy: 0.4401\n", - "Epoch 19/20\n", - "24943/24943 [==============================] - 172s 7ms/step - loss: 1.1985 - accuracy: 0.4373 - val_loss: 1.1708 - val_accuracy: 0.4393\n", - "Epoch 20/20\n", - "24943/24943 [==============================] - 168s 7ms/step - loss: 1.1884 - accuracy: 0.4374 - val_loss: 1.1779 - val_accuracy: 0.4366\n", - " Feature Importance\n", - "1 std 0.249967\n", - "4 kurtosis 0.208756\n", - "2 mean 0.189652\n", - "5 skewness 0.175992\n", - "3 variance 0.169360\n", - "0 signalName 0.006272\n", - "15589/15589 [==============================] - 37s 2ms/step\n", - "15589/15589 - 32s - loss: 1.1777 - accuracy: 0.4358 - 32s/epoch - 2ms/step\n", - "Cohen's Kappa: 0.2478535374116223\n", - "Recall: 0.435759\n", - "F1 score: 0.412573\n", - " precision recall f1-score support\n", - "\n", - " awake 0.78 0.50 0.61 124789\n", - " deep_sleep 0.39 0.64 0.49 124715\n", - " lite_sleep 0.34 0.09 0.14 124949\n", - " rem_sleep 0.35 0.51 0.42 124391\n", - "\n", - " accuracy 0.44 498844\n", - " macro avg 0.46 0.44 0.41 498844\n", - "weighted avg 0.46 0.44 0.41 498844\n", - "\n", - "Macro-average F1 Score: 0.4126748745582194\n" - ] - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "\"\\nCohen's Kappa: 0.2796082316913435\\nRecall: 0.459339\\nF1 score: 0.447196\\n precision recall f1-score support\\n\\n awake 0.86 0.54 0.66 52649\\n deep_sleep 0.38 0.75 0.50 52242\\n lite_sleep 0.36 0.17 0.23 52690\\n rem_sleep 0.41 0.38 0.40 52791\\n\\n accuracy 0.46 210372\\n macro avg 0.50 0.46 0.45 210372\\nweighted avg 0.50 0.46 0.45 210372\\n\\nUndersample\\nCohen's Kappa: 0.25736372829306753\\nRecall: 0.443274\\nF1 score: 0.441652\\n precision recall f1-score support\\n\\n awake 0.85 0.49 0.62 27280\\n deep_sleep 0.38 0.66 0.49 27386\\n lite_sleep 0.33 0.22 0.26 27088\\n rem_sleep 0.39 0.41 0.40 27384\\n\\n accuracy 0.44 109138\\n macro avg 0.49 0.44 0.44 109138\\nweighted avg 0.49 0.44 0.44 109138\\n\\n\\nFeature Importance\\n1 std 0.387214\\n2 mean 0.369977\\n3 maxA 0.228155\\n0 signalName 0.014655\\n3411/3411 [==============================] - 8s 2ms/step\\n3411/3411 - 8s - loss: 1.1881 - accuracy: 0.4272 - 8s/epoch - 2ms/step\\nCohen's Kappa: 0.2356531000090908\\nRecall: 0.427230\\nF1 score: 0.400917\\n precision recall f1-score support\\n\\n awake 0.87 0.46 0.60 27280\\n deep_sleep 0.37 0.70 0.49 27386\\n lite_sleep 0.28 0.06 0.10 27088\\n rem_sleep 0.36 0.48 0.41 27384\\n\\n accuracy 0.43 109138\\n macro avg 0.47 0.43 0.40 109138\\nweighted avg 0.47 0.43 0.40 109138\\n\\nMacro-average F1 Score: 0.40030597312739447\\n\\n\\n\\n\\n Feature Importance\\n1 std 0.229538\\n0 signalName 0.179083\\n4 kurtosis 0.165762\\n3 variance 0.152368\\n2 mean 0.140518\\n5 skewness 0.132731\\n6822/6822 [==============================] - 23s 3ms/step\\n6822/6822 - 18s - loss: 1.2765 - accuracy: 0.3565 - 18s/epoch - 3ms/step\\nCohen's Kappa: 0.14185713040600867\\nRecall: 0.356494\\nF1 score: 0.350898\\n precision recall f1-score support\\n\\n awake 0.43 0.52 0.47 54553\\n deep_sleep 0.36 0.34 0.35 54210\\n lite_sleep 0.28 0.35 0.31 54922\\n rem_sleep 0.36 0.22 0.27 54591\\n\\n accuracy 0.36 218276\\n macro avg 0.36 0.36 0.35 218276\\nweighted avg 0.36 0.36 0.35 218276\\n\\nMacro-average F1 Score: 0.3509835323216871\\n\\n\\nOHNE POS & ABDO\\nFeature Importance\\n1 std 0.317718\\n2 mean 0.260348\\n3 variance 0.166735\\n4 kurtosis 0.133296\\n5 skewness 0.105235\\n0 signalName 0.016668\\n3411/3411 [==============================] - 12s 3ms/step\\n3411/3411 - 10s - loss: 1.1958 - accuracy: 0.4254 - 10s/epoch - 3ms/step\\nCohen's Kappa: 0.23302260719925538\\nRecall: 0.425370\\nF1 score: 0.377417\\n precision recall f1-score support\\n\\n awake 0.84 0.47 0.60 27280\\n deep_sleep 0.38 0.67 0.48 27386\\n lite_sleep 0.31 0.00 0.00 27088\\n rem_sleep 0.34 0.56 0.42 27384\\n\\n accuracy 0.43 109138\\n macro avg 0.47 0.42 0.38 109138\\nweighted avg 0.47 0.43 0.38 109138\\n\\nMacro-average F1 Score: 0.3766119137787486\\n\"" - ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "string" - } - }, - "metadata": {}, - "execution_count": 8 - } - ], - "source": [ - "# LSTM\n", - "\n", - "def build_model(hp):\n", - " model = Sequential([\n", - " LSTM(hp.Int('units', min_value=32, max_value=128, step=32),\n", - " activation='relu',\n", - " input_shape=(amount_trained_features, 1)),\n", - " Dense(len(np.unique(y)), activation='softmax')\n", - " ])\n", - "\n", - " model.compile(optimizer=Adam(hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),\n", - " loss='sparse_categorical_crossentropy',\n", - " metrics=['accuracy'])\n", - " return model\n", - "\n", - "tuner = RandomSearch(\n", - " hypermodel=build_model,\n", - " objective='val_accuracy',\n", - " max_trials=4,\n", - " executions_per_trial=1\n", - ")\n", - "\n", - "tuner.search(X_train, y_train, epochs=10, validation_split=0.2, batch_size=62)\n", - "\n", - "# Get the best hyperparameters\n", - "best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]\n", - "print(f\"Best Hyperparameters {best_hps}\")\n", - "\n", - "# Build the model with the best hyperparameters and train it on the data\n", - "model = tuner.hypermodel.build(best_hps)\n", - "history = model.fit(X_train, y_train, epochs=20, batch_size=64, validation_split=0.2)\n", - "\n", - "# Extrahieren der Feature Importance\n", - "feature_importances = rf_classifier.feature_importances_\n", - "features_df = pd.DataFrame({\n", - " 'Feature': X.columns,\n", - " 'Importance': feature_importances\n", - "}).sort_values(by='Importance', ascending=False)\n", - "print(features_df)\n", - "\n", - "\n", - "\n", - "y_pred_probs = model.predict(X_test)\n", - "y_pred_classes = np.argmax(y_pred_probs, axis=1)\n", - "\n", - "test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)\n", - "\n", - "# Evaluation\n", - "kappa = cohen_kappa_score(y_test, y_pred_classes)\n", - "print(f\"Cohen's Kappa: {kappa}\")\n", - "\n", - "y_pred_labels = np.argmax(y_pred_probs, axis=1)\n", - "recall = recall_score(y_test, y_pred_labels, average='weighted')\n", - "print('Recall: %f' % recall)\n", - "\n", - "f1 = f1_score(y_test, y_pred_labels, average='weighted')\n", - "print('F1 score: %f' % f1)\n", - "\n", - "target_names = [str(name) for name in stage_encoder.inverse_transform([i for i in range(len(stage_encoder.classes_))])]\n", - "\n", - "print(classification_report(y_test, y_pred_classes, target_names=target_names))\n", - "macro_f1 = f1_score(y_test, y_pred_classes, average='macro')\n", - "print(f\"Macro-average F1 Score: {macro_f1}\")" - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [ - "L300ySZBqbyH", - "D8J3VziBqm-p", - "VdTlPBDxrJYL", - "24EeYQ3DrNrt", - "DD5MaGB4rUKv" - ], - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file