'I am trying to run a function in parallel but it is stuck and runs infinitely until I forcefully interrupt the process

When I run this function sequentially with a for-loop, it works just fine but doing it in parallel with multiprocessing makes it run continuously without stopping. I have tried both Pool and Process methods but both give the same problem, can't find where I getting it mixed up.

import numpy as np
import pandas as pd
import time
import multiprocessing as mp
import io

from google.colab import drive
drive.mount('/content/drive')
from google.colab import files
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

up = files.upload()
data = pd.read_csv(io.BytesIO(up['heart.csv']))

#### SEQUENTIAL PROGRAM - This part runs smoothly
            
s1 = time.time()
            
def kmeans(n):
  sc = StandardScaler()
  X = sc.fit_transform(pd.get_dummies(data, drop_first = True))
  ss = []
  km = KMeans(n_clusters=n, random_state=1)
  km = km.fit(X)
  ss.append(km.inertia_)
  print(f"Process {n} is executing")

  return ss
            
for i in range(1, 50):
  kmeans(i)
            
s2 = time.time()
            
print(f"took {s2-s1}s to finish")
        
### Using Multiprocessing- Process() - Runs continuously without stopping
        
tic2 = time.time()
        
process_list = []
for i in range(1, 50):
  p =  mp.Process(target= kmeans, args = [i])
  p.start()
  process_list.append(p)
        
for process in process_list:
  process.join()
        
 toc2 = time.time()
        
 print('Done in {:.4f} seconds'.format(toc2-tic2))
    
    ### Using Multiprocessing- Pool() - Runs continuously without stopping
    
 tic1 = time.time()
    
 pool = mp.Pool()
 pool.map(kmeans, range(1,50))
 pool.close()
    
 toc1 = time.time()
    
 print('Done in {:.4f} seconds'.format(toc1-tic1))


Solution 1:[1]

Apart from the nonsensical data this minor variation on your code runs perfectly (if you ignore the exceptions arising from the synthetic data)

Perhaps this will help you to see where you've gone wrong.

from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import pandas as pd
from concurrent.futures import ProcessPoolExecutor
import numpy as np
import warnings

def kmeans(n):
    data = ['John', 'Paul', 'George', 'Ringo']
    ss = []
    print(f"Process {n} is executing")
    try:
        sc = StandardScaler()
        X = sc.fit_transform(pd.get_dummies(data, drop_first=True))
        km = KMeans(n_clusters=n, random_state=1)
        km = km.fit(X)
        ss.append(km.inertia_)
    except Exception:
        pass
    print(f"Process {n} is ending")
    return ss


def main():
    warnings.filterwarnings('ignore')
    with ProcessPoolExecutor() as executor:
        executor.map(kmeans, list(range(50)))


if __name__ == '__main__':
    main()

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1