diff --git a/base_config.cfg b/base_config.cfg new file mode 100644 index 0000000000000000000000000000000000000000..dda5c64575b407b133dc6b83441cdb6424ffea34 --- /dev/null +++ b/base_config.cfg @@ -0,0 +1,83 @@ +# This is an auto-generated partial config. To use it with 'spacy train' +# you can run spacy init fill-config to auto-fill all default settings: +# python -m spacy init fill-config ./base_config.cfg ./config.cfg +[paths] +train = "/content/DBE_annual-project/train.spacy" +dev = "/content/DBE_annual-project/dev.spacy" + +[system] +gpu_allocator = "pytorch" + +[nlp] +lang = "en" +pipeline = ["transformer","ner"] +batch_size = 128 + +[components] + +[components.transformer] +factory = "transformer" + +[components.transformer.model] +@architectures = "spacy-transformers.TransformerModel.v1" +name = "roberta-base" +tokenizer_config = {"use_fast": true} + +[components.transformer.model.get_spans] +@span_getters = "spacy-transformers.strided_spans.v1" +window = 128 +stride = 96 + +[components.ner] +factory = "ner" + +[components.ner.model] +@architectures = "spacy.TransitionBasedParser.v2" +state_type = "ner" +extra_state_tokens = false +hidden_width = 64 +maxout_pieces = 2 +use_upper = false +nO = null + +[components.ner.model.tok2vec] +@architectures = "spacy-transformers.TransformerListener.v1" +grad_factor = 1.0 + +[components.ner.model.tok2vec.pooling] +@layers = "reduce_mean.v1" + +[corpora] + +[corpora.train] +@readers = "spacy.Corpus.v1" +path = ${paths.train} +max_length = 500 + +[corpora.dev] +@readers = "spacy.Corpus.v1" +path = ${paths.dev} +max_length = 0 + +[training] +accumulate_gradient = 3 +dev_corpus = "corpora.dev" +train_corpus = "corpora.train" + +[training.optimizer] +@optimizers = "Adam.v1" + +[training.optimizer.learn_rate] +@schedules = "warmup_linear.v1" +warmup_steps = 250 +total_steps = 20000 +initial_rate = 5e-5 + +[training.batcher] +@batchers = "spacy.batch_by_padded.v1" +discard_oversize = true +size = 2000 +buffer = 256 + +[initialize] +vectors = null