From 218d3e55153d44621b1df061d96a45ade70b55c2 Mon Sep 17 00:00:00 2001
From: Zacharias Sitter <zacharias.sitter@student.reutlingen-university.de>
Date: Sat, 14 Jan 2023 11:42:47 +0000
Subject: [PATCH] Replace DBE_annual-project.ipynb

---
 DBE_annual-project.ipynb | 119 +++------------------------------------
 1 file changed, 9 insertions(+), 110 deletions(-)

diff --git a/DBE_annual-project.ipynb b/DBE_annual-project.ipynb
index f7167f3..6877a23 100644
--- a/DBE_annual-project.ipynb
+++ b/DBE_annual-project.ipynb
@@ -69,26 +69,10 @@
         "!python -m spacy init fill-config /content/DBE_annual-project/base_config.cfg /content/DBE_annual-project/base_config_spacy.cfg\n"
       ],
       "metadata": {
-        "id": "PcgBSkHnitiO",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "outputId": "e935551e-cfcc-41f9-a7f2-9c758a609ede"
+        "id": "PcgBSkHnitiO"
       },
-      "execution_count": 3,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "\u001b[38;5;2m✔ Auto-filled config with all values\u001b[0m\n",
-            "\u001b[38;5;2m✔ Saved config\u001b[0m\n",
-            "/content/DBE_annual-project/base_config_spacy.cfg\n",
-            "You can now add your data and train your pipeline:\n",
-            "python -m spacy train base_config_spacy.cfg --paths.train ./train.spacy --paths.dev ./dev.spacy\n"
-          ]
-        }
-      ]
+      "execution_count": null,
+      "outputs": []
     },
     {
       "cell_type": "code",
@@ -96,57 +80,10 @@
         "!python -m spacy debug data /content/DBE_annual-project/base_config_spacy.cfg"
       ],
       "metadata": {
-        "id": "SqDuveAaHzFp",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "outputId": "8d3ffd0d-9aed-468f-dfdd-ccf70de356cb"
+        "id": "SqDuveAaHzFp"
       },
-      "execution_count": 4,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "\u001b[1m\n",
-            "============================ Data file validation ============================\u001b[0m\n",
-            "Downloading: 100% 481/481 [00:00<00:00, 441kB/s]\n",
-            "Downloading: 100% 899k/899k [00:01<00:00, 693kB/s]\n",
-            "Downloading: 100% 456k/456k [00:01<00:00, 422kB/s]\n",
-            "Downloading: 100% 1.36M/1.36M [00:01<00:00, 1.03MB/s]\n",
-            "Downloading: 100% 501M/501M [00:12<00:00, 40.1MB/s]\n",
-            "Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.dense.weight', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias']\n",
-            "- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
-            "- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
-            "\u001b[38;5;2m✔ Pipeline can be initialized with data\u001b[0m\n",
-            "\u001b[38;5;2m✔ Corpus is loadable\u001b[0m\n",
-            "\u001b[1m\n",
-            "=============================== Training stats ===============================\u001b[0m\n",
-            "Language: en\n",
-            "Training pipeline: transformer, ner\n",
-            "150 training docs\n",
-            "1 evaluation docs\n",
-            "\u001b[38;5;2m✔ No overlap between training and evaluation data\u001b[0m\n",
-            "\u001b[38;5;3mâš  Low number of examples to train a new pipeline (150)\u001b[0m\n",
-            "\u001b[1m\n",
-            "============================== Vocab & Vectors ==============================\u001b[0m\n",
-            "\u001b[38;5;4mℹ 5235 total word(s) in the data (1424 unique)\u001b[0m\n",
-            "\u001b[38;5;4mℹ No word vectors present in the package\u001b[0m\n",
-            "\u001b[1m\n",
-            "========================== Named Entity Recognition ==========================\u001b[0m\n",
-            "\u001b[38;5;4mℹ 1 label(s)\u001b[0m\n",
-            "0 missing value(s) (tokens with '-' label)\n",
-            "\u001b[38;5;2m✔ Good amount of examples for all labels\u001b[0m\n",
-            "\u001b[38;5;2m✔ Examples without occurrences available for all labels\u001b[0m\n",
-            "\u001b[38;5;2m✔ No entities consisting of or starting/ending with whitespace\u001b[0m\n",
-            "\u001b[38;5;2m✔ No entities crossing sentence boundaries\u001b[0m\n",
-            "\u001b[1m\n",
-            "================================== Summary ==================================\u001b[0m\n",
-            "\u001b[38;5;2m✔ 7 checks passed\u001b[0m\n",
-            "\u001b[38;5;3mâš  1 warning\u001b[0m\n"
-          ]
-        }
-      ]
+      "execution_count": null,
+      "outputs": []
     },
     {
       "cell_type": "code",
@@ -154,48 +91,10 @@
         "!python -m spacy train /content/DBE_annual-project/base_config_spacy.cfg --gpu-id 0 --paths.train /content/DBE_annual-project/train.spacy --paths.dev /content/DBE_annual-project/dev.spacy --output ./"
       ],
       "metadata": {
-        "id": "VLxW_Bcci9N1",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "outputId": "e6f96b52-8714-475b-b45e-c5c29a06fd8f"
+        "id": "VLxW_Bcci9N1"
       },
-      "execution_count": 5,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "\u001b[38;5;4mℹ Saving to output directory: .\u001b[0m\n",
-            "\u001b[38;5;4mℹ Using GPU: 0\u001b[0m\n",
-            "\u001b[1m\n",
-            "=========================== Initializing pipeline ===========================\u001b[0m\n",
-            "[2023-01-14 11:33:26,302] [INFO] Set up nlp object from config\n",
-            "INFO:spacy:Set up nlp object from config\n",
-            "[2023-01-14 11:33:26,312] [INFO] Pipeline: ['transformer', 'ner']\n",
-            "INFO:spacy:Pipeline: ['transformer', 'ner']\n",
-            "[2023-01-14 11:33:26,316] [INFO] Created vocabulary\n",
-            "INFO:spacy:Created vocabulary\n",
-            "[2023-01-14 11:33:26,316] [INFO] Finished initializing nlp object\n",
-            "INFO:spacy:Finished initializing nlp object\n",
-            "Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight']\n",
-            "- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
-            "- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
-            "[2023-01-14 11:33:43,715] [INFO] Initialized pipeline components: ['transformer', 'ner']\n",
-            "INFO:spacy:Initialized pipeline components: ['transformer', 'ner']\n",
-            "\u001b[38;5;2m✔ Initialized pipeline\u001b[0m\n",
-            "\u001b[1m\n",
-            "============================= Training pipeline =============================\u001b[0m\n",
-            "\u001b[38;5;4mℹ Pipeline: ['transformer', 'ner']\u001b[0m\n",
-            "\u001b[38;5;4mℹ Initial learn rate: 0.0\u001b[0m\n",
-            "E    #       LOSS TRANS...  LOSS NER  ENTS_F  ENTS_P  ENTS_R  SCORE \n",
-            "---  ------  -------------  --------  ------  ------  ------  ------\n",
-            "  0       0        6733.00    778.82    0.00    0.00    0.00    0.00\n",
-            "\n",
-            "Aborted!\n"
-          ]
-        }
-      ]
+      "execution_count": null,
+      "outputs": []
     },
     {
       "cell_type": "code",
-- 
GitLab