"print(\"Unique values in y after encoding:\", np.unique(y))\n",
"\n",
"\n",
"over = SMOTE(sampling_strategy=\"not majority\") # Oversample only the minority class / The number of samples in the different classes will be equalized\n",
"under = RandomUnderSampler(sampling_strategy='not minority') # Undersample only the majority class\n",
"steps = [('o', over), ('u', under)]\n",
"pipeline = Pipeline(steps=steps)\n",
"X, y = over.fit_resample(X, y) # ONLY USE OVERSMPLE\n",
"counter = Counter(y)\n",
"print(counter)\n",
"\n",
"\n",
"# Plotting the class distribution\n",
"plt.bar(counter.keys(), counter.values())\n",
"plt.xlabel('Class')\n",
"plt.ylabel('Amount of entries')\n",
"plt.title('Resampled Class Distribution')\n",
"plt.show()\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"