diff --git a/Tourism/Prediction cancellation of hotel bookings/notebook.ipynb b/Tourism/Prediction cancellation of hotel bookings/notebook.ipynb index 943859f408d989e72c9f0567da5921843b2e9c2f..d4095e82608932e6c61ded9b2553f2f72839d99d 100644 --- a/Tourism/Prediction cancellation of hotel bookings/notebook.ipynb +++ b/Tourism/Prediction cancellation of hotel bookings/notebook.ipynb @@ -5,6 +5,7 @@ "metadata": { "editable": true, "include": true, + "paragraph": "Geschäftsverständnis", "slideshow": { "slide_type": "" }, @@ -68,13 +69,12 @@ "cell_type": "markdown", "metadata": { "editable": true, - "jp-MarkdownHeadingCollapsed": true, + "include": true, + "paragraph": "Import von Relevant Module", "slideshow": { "slide_type": "" }, - "tags": [ - "active_ipynb" - ] + "tags": [] }, "source": [ "# 2.1. Import von Relevant Module" @@ -83,7 +83,14 @@ { "cell_type": "code", "execution_count": 2, - "metadata": {}, + "metadata": { + "editable": true, + "include": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [], "source": [ "import pandas as pd\n", @@ -102,7 +109,15 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "include": true, + "paragraph": "Daten Auslesen", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ "## 2.2. Read Data" ] @@ -110,7 +125,14 @@ { "cell_type": "code", "execution_count": 3, - "metadata": {}, + "metadata": { + "editable": true, + "include": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [ { "data": { @@ -345,7 +367,14 @@ { "cell_type": "code", "execution_count": 4, - "metadata": {}, + "metadata": { + "editable": true, + "include": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [], "source": [ "def attribute_description(data):\n", @@ -370,7 +399,14 @@ { "cell_type": "code", "execution_count": 5, - "metadata": {}, + "metadata": { + "editable": true, + "include": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [ { "name": "stdout", @@ -418,7 +454,14 @@ { "cell_type": "code", "execution_count": 6, - "metadata": {}, + "metadata": { + "editable": true, + "include": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [ { "data": { @@ -840,7 +883,15 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "include": true, + "paragraph": "Daten Vorbereitung", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ "## 2.3. Data Cleaning" ] @@ -848,7 +899,14 @@ { "cell_type": "code", "execution_count": 7, - "metadata": {}, + "metadata": { + "editable": true, + "include": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [ { "data": { @@ -875,7 +933,14 @@ { "cell_type": "code", "execution_count": 8, - "metadata": {}, + "metadata": { + "editable": true, + "include": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [ { "data": { @@ -927,7 +992,14 @@ { "cell_type": "code", "execution_count": 9, - "metadata": {}, + "metadata": { + "editable": true, + "include": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [], "source": [ "df = df.drop(['reservation_status'], axis=1)" @@ -936,7 +1008,14 @@ { "cell_type": "code", "execution_count": 10, - "metadata": {}, + "metadata": { + "editable": true, + "include": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [], "source": [ "df = df.drop(['stays_in_weekend_nights'], axis=1)" @@ -945,7 +1024,14 @@ { "cell_type": "code", "execution_count": 11, - "metadata": {}, + "metadata": { + "editable": true, + "include": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [], "source": [ "df = df.drop(['reservation_status_date'], axis=1)" @@ -954,7 +1040,14 @@ { "cell_type": "code", "execution_count": 12, - "metadata": {}, + "metadata": { + "editable": true, + "include": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [], "source": [ "df = df.drop(['arrival_date_day_of_month'], axis=1)" @@ -963,7 +1056,14 @@ { "cell_type": "code", "execution_count": 13, - "metadata": {}, + "metadata": { + "editable": true, + "include": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [], "source": [ "df = df.drop(['arrival_date_year'], axis=1)" @@ -972,7 +1072,14 @@ { "cell_type": "code", "execution_count": 14, - "metadata": {}, + "metadata": { + "editable": true, + "include": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [], "source": [ "df = df.drop(['arrival_date_month'], axis=1)" @@ -981,7 +1088,14 @@ { "cell_type": "code", "execution_count": 15, - "metadata": {}, + "metadata": { + "editable": true, + "include": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [], "source": [ "df = df.drop(['arrival_date_week_number'], axis=1)" @@ -990,7 +1104,14 @@ { "cell_type": "code", "execution_count": 16, - "metadata": {}, + "metadata": { + "editable": true, + "include": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [], "source": [ "df = df.drop(['required_car_parking_spaces'], axis=1)" @@ -999,7 +1120,14 @@ { "cell_type": "code", "execution_count": 17, - "metadata": {}, + "metadata": { + "editable": true, + "include": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [], "source": [ "df = df.drop(['previous_bookings_not_canceled'], axis=1)" @@ -1008,7 +1136,14 @@ { "cell_type": "code", "execution_count": 18, - "metadata": {}, + "metadata": { + "editable": true, + "include": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [], "source": [ "df = df.drop(['total_of_special_requests'], axis=1)" @@ -1017,7 +1152,14 @@ { "cell_type": "code", "execution_count": 19, - "metadata": {}, + "metadata": { + "editable": true, + "include": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [], "source": [ "df = df.drop(['agent'], axis=1)" @@ -1026,7 +1168,14 @@ { "cell_type": "code", "execution_count": 20, - "metadata": {}, + "metadata": { + "editable": true, + "include": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [], "source": [ "df = df.drop(['company'], axis=1)" @@ -1035,7 +1184,14 @@ { "cell_type": "code", "execution_count": 21, - "metadata": {}, + "metadata": { + "editable": true, + "include": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [], "source": [ "df = df.drop(['adr'], axis=1)" @@ -1044,7 +1200,14 @@ { "cell_type": "code", "execution_count": 22, - "metadata": {}, + "metadata": { + "editable": true, + "include": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [], "source": [ "df = df.dropna(axis=0)" @@ -1052,7 +1215,15 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "include": true, + "paragraph": "Test auf Multikollinearität", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ "## 2.4. Test for Multicollinearity" ] @@ -1060,7 +1231,14 @@ { "cell_type": "code", "execution_count": 23, - "metadata": {}, + "metadata": { + "editable": true, + "include": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [], "source": [ "from statsmodels.stats.outliers_influence import variance_inflation_factor\n", @@ -1073,7 +1251,14 @@ { "cell_type": "code", "execution_count": 24, - "metadata": {}, + "metadata": { + "editable": true, + "include": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [ { "data": { @@ -1174,7 +1359,15 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "editable": true, + "include": true, + "paragraph": "Deskriptiv Analyse", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ "## 2.5. Descriptive Analysis" ] @@ -1182,7 +1375,14 @@ { "cell_type": "code", "execution_count": 25, - "metadata": {}, + "metadata": { + "editable": true, + "include": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [ { "data": { @@ -1228,6 +1428,7 @@ "metadata": { "editable": true, "include": true, + "paragraph": "Datenaufbereitung", "slideshow": { "slide_type": "" }, @@ -1260,6 +1461,7 @@ "metadata": { "editable": true, "include": true, + "paragraph": "Erfassung kategorialer Variablen", "slideshow": { "slide_type": "" }, @@ -1278,9 +1480,7 @@ "slideshow": { "slide_type": "" }, - "tags": [ - "active_ipynb" - ] + "tags": [] }, "outputs": [ { @@ -1600,6 +1800,7 @@ "metadata": { "editable": true, "include": true, + "paragraph": "Modellierung und Auswertung", "slideshow": { "slide_type": "" }, @@ -1639,6 +1840,7 @@ "metadata": { "editable": true, "include": true, + "paragraph": "Test- und Trainingsdaten", "slideshow": { "slide_type": "" }, @@ -1657,9 +1859,7 @@ "slideshow": { "slide_type": "" }, - "tags": [ - "active_ipynb" - ] + "tags": [] }, "outputs": [], "source": [ @@ -1927,6 +2127,7 @@ "metadata": { "editable": true, "include": true, + "paragraph": "DecisionTree", "slideshow": { "slide_type": "" }, @@ -1975,9 +2176,7 @@ "slideshow": { "slide_type": "" }, - "tags": [ - "active_ipynb" - ] + "tags": [] }, "outputs": [ { @@ -2066,6 +2265,7 @@ "metadata": { "editable": true, "include": true, + "paragraph": "Logistik Regression", "slideshow": { "slide_type": "" },