From 218d3e55153d44621b1df061d96a45ade70b55c2 Mon Sep 17 00:00:00 2001 From: Zacharias Sitter <zacharias.sitter@student.reutlingen-university.de> Date: Sat, 14 Jan 2023 11:42:47 +0000 Subject: [PATCH] Replace DBE_annual-project.ipynb --- DBE_annual-project.ipynb | 119 +++------------------------------------ 1 file changed, 9 insertions(+), 110 deletions(-) diff --git a/DBE_annual-project.ipynb b/DBE_annual-project.ipynb index f7167f3..6877a23 100644 --- a/DBE_annual-project.ipynb +++ b/DBE_annual-project.ipynb @@ -69,26 +69,10 @@ "!python -m spacy init fill-config /content/DBE_annual-project/base_config.cfg /content/DBE_annual-project/base_config_spacy.cfg\n" ], "metadata": { - "id": "PcgBSkHnitiO", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "e935551e-cfcc-41f9-a7f2-9c758a609ede" + "id": "PcgBSkHnitiO" }, - "execution_count": 3, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\u001b[38;5;2m✔ Auto-filled config with all values\u001b[0m\n", - "\u001b[38;5;2m✔ Saved config\u001b[0m\n", - "/content/DBE_annual-project/base_config_spacy.cfg\n", - "You can now add your data and train your pipeline:\n", - "python -m spacy train base_config_spacy.cfg --paths.train ./train.spacy --paths.dev ./dev.spacy\n" - ] - } - ] + "execution_count": null, + "outputs": [] }, { "cell_type": "code", @@ -96,57 +80,10 @@ "!python -m spacy debug data /content/DBE_annual-project/base_config_spacy.cfg" ], "metadata": { - "id": "SqDuveAaHzFp", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "8d3ffd0d-9aed-468f-dfdd-ccf70de356cb" + "id": "SqDuveAaHzFp" }, - "execution_count": 4, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\u001b[1m\n", - "============================ Data file validation ============================\u001b[0m\n", - "Downloading: 100% 481/481 [00:00<00:00, 441kB/s]\n", - "Downloading: 100% 899k/899k [00:01<00:00, 693kB/s]\n", - "Downloading: 100% 456k/456k [00:01<00:00, 422kB/s]\n", - "Downloading: 100% 1.36M/1.36M [00:01<00:00, 1.03MB/s]\n", - "Downloading: 100% 501M/501M [00:12<00:00, 40.1MB/s]\n", - "Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.dense.weight', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias']\n", - "- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", - "- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", - "\u001b[38;5;2m✔ Pipeline can be initialized with data\u001b[0m\n", - "\u001b[38;5;2m✔ Corpus is loadable\u001b[0m\n", - "\u001b[1m\n", - "=============================== Training stats ===============================\u001b[0m\n", - "Language: en\n", - "Training pipeline: transformer, ner\n", - "150 training docs\n", - "1 evaluation docs\n", - "\u001b[38;5;2m✔ No overlap between training and evaluation data\u001b[0m\n", - "\u001b[38;5;3m⚠Low number of examples to train a new pipeline (150)\u001b[0m\n", - "\u001b[1m\n", - "============================== Vocab & Vectors ==============================\u001b[0m\n", - "\u001b[38;5;4mℹ 5235 total word(s) in the data (1424 unique)\u001b[0m\n", - "\u001b[38;5;4mℹ No word vectors present in the package\u001b[0m\n", - "\u001b[1m\n", - "========================== Named Entity Recognition ==========================\u001b[0m\n", - "\u001b[38;5;4mℹ 1 label(s)\u001b[0m\n", - "0 missing value(s) (tokens with '-' label)\n", - "\u001b[38;5;2m✔ Good amount of examples for all labels\u001b[0m\n", - "\u001b[38;5;2m✔ Examples without occurrences available for all labels\u001b[0m\n", - "\u001b[38;5;2m✔ No entities consisting of or starting/ending with whitespace\u001b[0m\n", - "\u001b[38;5;2m✔ No entities crossing sentence boundaries\u001b[0m\n", - "\u001b[1m\n", - "================================== Summary ==================================\u001b[0m\n", - "\u001b[38;5;2m✔ 7 checks passed\u001b[0m\n", - "\u001b[38;5;3m⚠1 warning\u001b[0m\n" - ] - } - ] + "execution_count": null, + "outputs": [] }, { "cell_type": "code", @@ -154,48 +91,10 @@ "!python -m spacy train /content/DBE_annual-project/base_config_spacy.cfg --gpu-id 0 --paths.train /content/DBE_annual-project/train.spacy --paths.dev /content/DBE_annual-project/dev.spacy --output ./" ], "metadata": { - "id": "VLxW_Bcci9N1", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "e6f96b52-8714-475b-b45e-c5c29a06fd8f" + "id": "VLxW_Bcci9N1" }, - "execution_count": 5, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\u001b[38;5;4mℹ Saving to output directory: .\u001b[0m\n", - "\u001b[38;5;4mℹ Using GPU: 0\u001b[0m\n", - "\u001b[1m\n", - "=========================== Initializing pipeline ===========================\u001b[0m\n", - "[2023-01-14 11:33:26,302] [INFO] Set up nlp object from config\n", - "INFO:spacy:Set up nlp object from config\n", - "[2023-01-14 11:33:26,312] [INFO] Pipeline: ['transformer', 'ner']\n", - "INFO:spacy:Pipeline: ['transformer', 'ner']\n", - "[2023-01-14 11:33:26,316] [INFO] Created vocabulary\n", - "INFO:spacy:Created vocabulary\n", - "[2023-01-14 11:33:26,316] [INFO] Finished initializing nlp object\n", - "INFO:spacy:Finished initializing nlp object\n", - "Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight']\n", - "- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", - "- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", - "[2023-01-14 11:33:43,715] [INFO] Initialized pipeline components: ['transformer', 'ner']\n", - "INFO:spacy:Initialized pipeline components: ['transformer', 'ner']\n", - "\u001b[38;5;2m✔ Initialized pipeline\u001b[0m\n", - "\u001b[1m\n", - "============================= Training pipeline =============================\u001b[0m\n", - "\u001b[38;5;4mℹ Pipeline: ['transformer', 'ner']\u001b[0m\n", - "\u001b[38;5;4mℹ Initial learn rate: 0.0\u001b[0m\n", - "E # LOSS TRANS... LOSS NER ENTS_F ENTS_P ENTS_R SCORE \n", - "--- ------ ------------- -------- ------ ------ ------ ------\n", - " 0 0 6733.00 778.82 0.00 0.00 0.00 0.00\n", - "\n", - "Aborted!\n" - ] - } - ] + "execution_count": null, + "outputs": [] }, { "cell_type": "code", -- GitLab