'RuntimeError on windows trying python multiprocessing encoding

I'm trying to run the python multiprocessing library to speed up encoding of csv file. However I run into this error:

RuntimeError: 
        An attempt has been made to start a new process before the
        current process has finished its bootstrapping phase.

        This probably means that you are not using fork to start your
        child processes and you have forgotten to use the proper idiom
        in the main module:

            if __name__ == '__main__':
                freeze_support()
                ...

        The "freeze_support()" line can be omitted if the program
        is not going to be frozen to produce an executable.

I did create

Search = SemanticSearch(model_path, data_path, query)
if __name__ == '__main__':

    query, flat, top_results = Search.search()

That points to the function in my class,

def setup(self):
        with open(self.data_path, newline='') as f:  # read and sort data
            reader = csv.reader(f)
            data1 = list(reader)
        self.corpus = [x for sublist in data1 for x in sublist]  # turn into 1D list
        #SemanticSearch.encode(self)
        self.texts_encodings = self.map(self.encode, self.corpus)
        end = time.time()
        print(end - self.start)


    def encode(self):
        self.start = time.time()
        return self.model.encode(self.corpus, convert_to_tensor=True, show_progress_bar=True)

In my init function I did call and set

self.map = Pool().map

Any tips something I'm missing? Thanks in advance

EDIT

class SemanticSearch(object):
   def __init__(self, model, data, query):
       self.query = query
       self.model = SentenceTransformer(model)  ### Model location
       self.data_path = data  ###path to csv 
       self.corpus = None
       self.texts_encodings = None
       self.start = None
       self.map = Pool().map

   def setup(self):
       print('here')
       with open(self.data_path, newline='') as f:  # read and sort data
           reader = csv.reader(f)
           data1 = list(reader)
       self.corpus = [x for sublist in data1 for x in sublist]  # turn into 1D list
       # SemanticSearch.encode(self)
       self.texts_encodings = self.map(self.encode, self.corpus)
       # SemanticSearch.encode(self)
       end = time.time()
       print(end - self.start)

   def encode(self):
       self.start = time.time()
       return self.model.encode(self.corpus, convert_to_tensor=True,
                                show_progress_bar=True)  ##encode to invisible layer

   def search(self):
       SemanticSearch.setup(self)


if __name__ == "__main__":
   model_path = r'data\BERT_MODELS\fine-tuned\multi-qa-MiniLM-L6-cos-v1'
   data_path = 'data/raw_data/Jira-2_14_2022.csv'
   query = 'query'

   Search = SemanticSearch(model_path, data_path, query)

   query, flat, top_results = Search.search()

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution	Source

'RuntimeError on windows trying python multiprocessing encoding

Sources

Related Questions