'How to create a SpaCy pipeline from Model Data Path?

I am having trouble with creating a pipeline when loading model path in method from_disk of spacy.lang.en.English object. I have also consulted on SpaCy's documentation page but I still receive ValueError: Cannot deserialize model: mismatched structure

from spacy import util
import spacy

nlp = spacy.load('en_core_web_lg')

model = nlp.meta['lang'] + '_' + nlp.meta['name']
version = model + '-' + nlp.meta['version']
path = util.get_package_path(model).as_posix() + '/' + version
# /opt/homebrew/Caskroom/mambaforge/base/envs/aio/lib/python3.10/site-packages/en_core_web_lg/en_core_web_lg-3.2.0

lang = 'en'
cls = util.get_lang_class(lang)
nlp = cls()

pipeline = ['tagger', 'parser', 'ner']

for name in pipeline:
    nlp.add_pipe(name)

nlp.from_disk(path)

The output:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
/Users/khoilr/Code/NLP/nlp.ipynb Cell 2 in <module>
     17 for name in pipeline:
     18     nlp.add_pipe(name)
---> 20 nlp.from_disk(path)

File /opt/homebrew/Caskroom/mambaforge/base/envs/aio/lib/python3.10/site-packages/spacy/language.py:2042, in Language.from_disk(self, path, exclude, overrides)
   2039 if not (path / "vocab").exists() and "vocab" not in exclude:  # type: ignore[operator]
   2040     # Convert to list here in case exclude is (default) tuple
   2041     exclude = list(exclude) + ["vocab"]
-> 2042 util.from_disk(path, deserializers, exclude)  # type: ignore[arg-type]
   2043 self._path = path  # type: ignore[assignment]
   2044 self._link_components()

File /opt/homebrew/Caskroom/mambaforge/base/envs/aio/lib/python3.10/site-packages/spacy/util.py:1299, in from_disk(path, readers, exclude)
   1296 for key, reader in readers.items():
   1297     # Split to support file names like meta.json
   1298     if key.split(".")[0] not in exclude:
-> 1299         reader(path / key)
   1300 return path

File /opt/homebrew/Caskroom/mambaforge/base/envs/aio/lib/python3.10/site-packages/spacy/language.py:2036, in Language.from_disk.<locals>.<lambda>(p, proc)
   2034     if not hasattr(proc, "from_disk"):
   2035         continue
-> 2036     deserializers[name] = lambda p, proc=proc: proc.from_disk(  # type: ignore[misc]
   2037         p, exclude=["vocab"]
   2038     )
   2039 if not (path / "vocab").exists() and "vocab" not in exclude:  # type: ignore[operator]
   2040     # Convert to list here in case exclude is (default) tuple
   2041     exclude = list(exclude) + ["vocab"]

File /opt/homebrew/Caskroom/mambaforge/base/envs/aio/lib/python3.10/site-packages/spacy/pipeline/trainable_pipe.pyx:343, in spacy.pipeline.trainable_pipe.TrainablePipe.from_disk()

File /opt/homebrew/Caskroom/mambaforge/base/envs/aio/lib/python3.10/site-packages/spacy/util.py:1299, in from_disk(path, readers, exclude)
   1296 for key, reader in readers.items():
   1297     # Split to support file names like meta.json
   1298     if key.split(".")[0] not in exclude:
-> 1299         reader(path / key)
   1300 return path

File /opt/homebrew/Caskroom/mambaforge/base/envs/aio/lib/python3.10/site-packages/spacy/pipeline/trainable_pipe.pyx:333, in spacy.pipeline.trainable_pipe.TrainablePipe.from_disk.load_model()

File /opt/homebrew/Caskroom/mambaforge/base/envs/aio/lib/python3.10/site-packages/spacy/pipeline/trainable_pipe.pyx:334, in spacy.pipeline.trainable_pipe.TrainablePipe.from_disk.load_model()

File /opt/homebrew/Caskroom/mambaforge/base/envs/aio/lib/python3.10/site-packages/thinc/model.py:593, in Model.from_bytes(self, bytes_data)
    591 msg = srsly.msgpack_loads(bytes_data)
    592 msg = convert_recursive(is_xp_array, self.ops.asarray, msg)
--> 593 return self.from_dict(msg)

File /opt/homebrew/Caskroom/mambaforge/base/envs/aio/lib/python3.10/site-packages/thinc/model.py:610, in Model.from_dict(self, msg)
    608 nodes = list(self.walk())
    609 if len(msg["nodes"]) != len(nodes):
--> 610     raise ValueError("Cannot deserialize model: mismatched structure")
    611 for i, node in enumerate(nodes):
    612     info = msg["nodes"][i]

ValueError: Cannot deserialize model: mismatched structure


Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source