'Cant load pretrained model to generate embeddings

I am using this code to generate sentence embeddings with the hugging face transformer library, and I am getting this error. I can't seem to resolve this problem. Any pointers will help. Thanks.

from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F

#Mean Pooling - Take attention mask into account for correct averaging
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0] #First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)


# Sentences we want sentence embeddings for
sentences = ['This is an example sentence', 'Each sentence is converted']

# Load model from HuggingFace Hub
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-roberta-large-v1')
model = AutoModel.from_pretrained('sentence-transformers/all-roberta-large-v1')

# Tokenize sentences
encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')

# Compute token embeddings
with torch.no_grad():
    model_output = model(**encoded_input)

# Perform pooling
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])

# Normalize embeddings
sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)

print("Sentence embeddings:")
print(sentence_embeddings)

print(torch.__version__)

>> 1.4.0

I am getting the below error.

RuntimeError Traceback (most recent call last) ~\anaconda3\envs\tf_env\lib\site-packages\transformers\modeling_utils.py in from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs) 1434 try: -> 1435 state_dict = torch.load(resolved_archive_file, map_location="cpu") 1436
except Exception as e:

~\anaconda3\envs\tf_env\lib\site-packages\torch\serialization.py in load(f, map_location, pickle_module, **pickle_load_args) 526 if _is_zipfile(opened_file): --> 527 with _open_zipfile_reader(f) as opened_zipfile: 528 return _load(opened_zipfile, map_location, pickle_module, **pickle_load_args)

~\anaconda3\envs\tf_env\lib\site-packages\torch\serialization.py in init(self, name_or_buffer) 223 def init(self, name_or_buffer): --> 224 super(_open_zipfile_reader, self).init(torch._C.PyTorchFileReader(name_or_buffer)) 225

RuntimeError: version_ <= kMaxSupportedFileFormatVersion INTERNAL ASSERT FAILED at ..\caffe2\serialize\inline_container.cc:132, please report a bug to PyTorch. Attempted to read a PyTorch file with version 3, but the maximum supported version for reading is 2. Your PyTorch installation may be too old. (init at ..\caffe2\serialize\inline_container.cc:132) (no backtrace available)

During handling of the above exception, another exception occurred:

MemoryError Traceback (most recent call last) ~\AppData\Local\Temp/ipykernel_31400/3667175826.py in ----> 1 model = AutoModel.from_pretrained('sentence-transformers/all-roberta-large-v1') 2 3 # Tokenize sentences 4 encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt') 5

~\anaconda3\envs\tf_env\lib\site-packages\transformers\models\auto\auto_factory.py in from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs) 445 elif type(config) in cls._model_mapping.keys(): 446 model_class = _get_model_class(config, cls._model_mapping) --> 447 return model_class.from_pretrained(pretrained_model_name_or_path, *model_args, config=config, **kwargs) 448 raise ValueError( 449 f"Unrecognized configuration class {config.class} for this kind of AutoModel: {cls.name}.\n"

~\anaconda3\envs\tf_env\lib\site-packages\transformers\modeling_utils.py in from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs) 1437 try: 1438 with open(resolved_archive_file) as f: -> 1439 if f.read().startswith("version"): 1440 raise OSError( 1441
"You seem to have cloned a repository without having git-lfs installed. Please install "

~\anaconda3\envs\tf_env\lib\encodings\cp1252.py in decode(self, input, final) 21 class IncrementalDecoder(codecs.IncrementalDecoder): 22 def decode(self, input, final=False): ---> 23 return codecs.charmap_decode(input,self.errors,decoding_table)[0] 24 25 class StreamWriter(Codec,codecs.StreamWriter):

MemoryError:

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution	Source

'Cant load pretrained model to generate embeddings

Sources

Related Questions