'I am trying to run a function in parallel but it is stuck and runs infinitely until I forcefully interrupt the process
When I run this function sequentially with a for-loop, it works just fine but doing it in parallel with multiprocessing makes it run continuously without stopping. I have tried both Pool and Process methods but both give the same problem, can't find where I getting it mixed up.
import numpy as np
import pandas as pd
import time
import multiprocessing as mp
import io
from google.colab import drive
drive.mount('/content/drive')
from google.colab import files
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
up = files.upload()
data = pd.read_csv(io.BytesIO(up['heart.csv']))
#### SEQUENTIAL PROGRAM - This part runs smoothly
s1 = time.time()
def kmeans(n):
sc = StandardScaler()
X = sc.fit_transform(pd.get_dummies(data, drop_first = True))
ss = []
km = KMeans(n_clusters=n, random_state=1)
km = km.fit(X)
ss.append(km.inertia_)
print(f"Process {n} is executing")
return ss
for i in range(1, 50):
kmeans(i)
s2 = time.time()
print(f"took {s2-s1}s to finish")
### Using Multiprocessing- Process() - Runs continuously without stopping
tic2 = time.time()
process_list = []
for i in range(1, 50):
p = mp.Process(target= kmeans, args = [i])
p.start()
process_list.append(p)
for process in process_list:
process.join()
toc2 = time.time()
print('Done in {:.4f} seconds'.format(toc2-tic2))
### Using Multiprocessing- Pool() - Runs continuously without stopping
tic1 = time.time()
pool = mp.Pool()
pool.map(kmeans, range(1,50))
pool.close()
toc1 = time.time()
print('Done in {:.4f} seconds'.format(toc1-tic1))
Solution 1:[1]
Apart from the nonsensical data this minor variation on your code runs perfectly (if you ignore the exceptions arising from the synthetic data)
Perhaps this will help you to see where you've gone wrong.
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import pandas as pd
from concurrent.futures import ProcessPoolExecutor
import numpy as np
import warnings
def kmeans(n):
data = ['John', 'Paul', 'George', 'Ringo']
ss = []
print(f"Process {n} is executing")
try:
sc = StandardScaler()
X = sc.fit_transform(pd.get_dummies(data, drop_first=True))
km = KMeans(n_clusters=n, random_state=1)
km = km.fit(X)
ss.append(km.inertia_)
except Exception:
pass
print(f"Process {n} is ending")
return ss
def main():
warnings.filterwarnings('ignore')
with ProcessPoolExecutor() as executor:
executor.map(kmeans, list(range(50)))
if __name__ == '__main__':
main()
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 |
