'BFS Parallel OpenMP low speed up
The following parallelized version of the BFS algorithm code on a graph gave me a speed up of 1.5 compared to the sequential version considering a fully connected graph with 5000 nodes, on a dual core 4 threads processor. I was wondering if it was possible to modify this algorithm to improve parallelization. Furthermore, with the code shown below I cannot consider a graph of over 10000 nodes which is returned to me as a segmentation fault and I would like to know if it is possible to solve this problem to test the code on a larger graph. Below is the code for the bfs and the main.
MAIN:
#include "bfs.h"
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <stdbool.h>
/**
* Conditional compilation to calculate the time with omp_get_wtime()
* if you are using openMP or with the macros defined in the bfs.h if you are not using open MP.
*/
#ifdef _OPENMP
#include <omp.h>
#else
#define omp_get_wtime() 0
#endif
int main(int argc, char const *argv[])
{
double bfs_time = 0.0, initialization_time = 0.0;
int n, threads;
double start_time_init, end_time_init, start_time_bfs, end_time_bfs;
if (argc < 3)
{
printf("ERROR! Usage: ./main dim threads\n");
exit(1);
}
n = atoi(argv[1]);
threads = atoi(argv[2]);
STARTTIME(1);
start_time_init = omp_get_wtime();
struct node *nodes = (struct node*)malloc(n*sizeof(struct node));
init_structures(n, nodes);
ENDTIME(1, initialization_time);
end_time_init = omp_get_wtime();
STARTTIME(2);
start_time_bfs = omp_get_wtime();
bool *visited = (bool*)malloc(n);
parallel_bfs(n, nodes, 0, visited, threads);
ENDTIME(2, bfs_time);
end_time_bfs = omp_get_wtime();
if (threads != 0){
initialization_time = end_time_init - start_time_init;
bfs_time = end_time_bfs - start_time_bfs;
}
printf("%d;%d;%f;%f\n", n, threads, initialization_time, bfs_time);
free(visited);
free(nodes);
return 0;
}
BFS:
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdint.h>
#include <time.h>
#include "bfs.h"
/**
* Conditional compilation to run the code with threads if you are using openMP
* or with 0 thread if you are not using open MP.
*/
#ifdef _OPENMP
#include <omp.h>
#else
#define get_thread_num() 0
#endif
void init_structures(int n, struct node *nodes) {
for (int i=0; i<n; i++)
{
nodes[i].num_neighbours=n-1;
nodes[i].next = (int*)malloc(nodes[i].num_neighbours*sizeof(int));
for (int j=0; j<nodes[i].num_neighbours; j++)
{
if (j!=i) {
nodes[i].next[j]=j; }
}
}
}
int visit_check(int n, bool *visited) // funzione per verificare quale nodo è stato visitato oppure no
{
for (int i=0; i<n; i++)
{
if (visited[i]==false)
{
return i;
}
}
return -1;
}
void parallel_bfs(int n, struct node *nodes, int start, bool *visited, int threads)
{
int *level = (int*)malloc(n*sizeof(int));
visited[start] = true;
level[start] = 0;
int *q[2];
q[0] = (int*)malloc(n*sizeof(int));
q[1] = (int*)malloc(n*sizeof(int));
q[0][0] = start;
int len[2];
len[0] = 1;
int next_queue, current_queue = 0;
int current_node;
while(1)
{
current_node = q[current_queue][0];
next_queue = (current_queue + 1)%2;
len[next_queue] = 0;
#pragma omp parallel for private(current_node) num_threads(threads)
for (int i=0; i<len[current_queue]; i++)
{
current_node = q[current_queue][i];
for (int j=0; j<nodes[current_node].num_neighbours; j++)
{
if (!visited[nodes[current_node].next[j]])
{
visited[nodes[current_node].next[j]] = true;
level[nodes[current_node].next[j]] = level[current_node]+1;
#pragma omp critical
q[next_queue][len[next_queue]++] = nodes[current_node].next[j];
}
}
}
if (len[next_queue]==0)
break;
else
current_queue = next_queue;
}
free(level);
free(q[0]);
free(q[1]);
}
Thanks a lot for your help.
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
